\documentclass[10pt,english]{article} \usepackage{babel} \usepackage{shortvrb} \usepackage[latin1]{inputenc} \usepackage{tabularx} \usepackage{longtable} \setlength{\extrarowheight}{2pt} \usepackage{amsmath} \usepackage{graphicx} \usepackage{color} \usepackage{multirow} \usepackage[colorlinks=true,linkcolor=blue,urlcolor=blue]{hyperref} \usepackage[a4paper]{geometry} %% generator Docutils: http://docutils.sourceforge.net/ \newlength{\admonitionwidth} \setlength{\admonitionwidth}{0.9\textwidth} \newlength{\docinfowidth} \setlength{\docinfowidth}{0.9\textwidth} \newlength{\locallinewidth} \newcommand{\optionlistlabel}[1]{\bf #1 \hfill} \newenvironment{optionlist}[1] {\begin{list}{} {\setlength{\labelwidth}{#1} \setlength{\rightmargin}{1cm} \setlength{\leftmargin}{\rightmargin} \addtolength{\leftmargin}{\labelwidth} \addtolength{\leftmargin}{\labelsep} \renewcommand{\makelabel}{\optionlistlabel}} }{\end{list}} % begin: floats for footnotes tweaking. \setlength{\floatsep}{0.5em} \setlength{\textfloatsep}{\fill} \addtolength{\textfloatsep}{3em} \renewcommand{\textfraction}{0.5} \renewcommand{\topfraction}{0.5} \renewcommand{\bottomfraction}{0.5} \setcounter{totalnumber}{50} \setcounter{topnumber}{50} \setcounter{bottomnumber}{50} % end floats for footnotes % some commands, that could be overwritten in the style file. \newcommand{\rubric}[1]{\subsection*{~\hfill {\it #1} \hfill ~}} \newcommand{\titlereference}[1]{\textsl{#1}} % end of "some commands" \input{style.tex} \title{OBJECT ORIENTED PROGRAMMING IN PYTHON} \author{} \date{} \hypersetup{ pdftitle={OBJECT ORIENTED PROGRAMMING IN PYTHON}, pdfauthor={Michele Simionato} } \raggedbottom \begin{document} \maketitle %___________________________________________________________________________ \begin{center} \begin{tabularx}{\docinfowidth}{lX} \textbf{Version}: & 0.5 \\ \textbf{Author}: & Michele Simionato \\ \textbf{E-mail}: & mis6@pitt.edu \\ \textbf{Home-page}: & http://www.phyast.pitt.edu/~micheles/ \\ \textbf{Disclaimer}: & I release this book to the general public. It can be freely distributed if unchanged. As usual, I don't give any warranty: while I have tried hard to ensure the correctness of what follows, I disclaim any responsability in case of errors . Use it at your own risk and peril ! \\ \end{tabularx} \end{center} \setlength{\locallinewidth}{\linewidth} \hypertarget{contents}{} \pdfbookmark[0]{Contents}{contents} \subsection*{~\hfill Contents\hfill ~} \begin{list}{}{} \item {} \href{\#preface}{Preface} \begin{list}{}{} \item {} \href{\#the-philosophy-of-this-book}{The philosophy of this book} \item {} \href{\#for-who-this-book-in-intended}{For who this book in intended} \item {} \href{\#about-the-scripts-in-this-book}{About the scripts in this book} \item {} \href{\#conventions-used-in-this-book}{Conventions used in this book} \end{list} \item {} \href{\#introduction}{Introduction} \begin{list}{}{} \item {} \href{\#why-oop}{Why OOP ?} \item {} \href{\#why-python}{Why Python ?} \item {} \href{\#further-thoughts}{Further thoughts} \end{list} \item {} \href{\#first-things-first}{FIRST THINGS, FIRST} \begin{list}{}{} \item {} \href{\#what-s-an-object}{What's an object?} \item {} \href{\#objects-and-classes}{Objects and classes} \item {} \href{\#objects-have-attributes}{Objects have attributes} \item {} \href{\#objects-have-methods}{Objects have methods} \item {} \href{\#summing-objects}{Summing objects} \item {} \href{\#inspecting-objects}{Inspecting objects} \item {} \href{\#built-in-objects-iterators-and-generators}{Built-in objects: iterators and generators} \end{list} \item {} \href{\#the-convenience-of-functions}{THE CONVENIENCE OF FUNCTIONS} \begin{list}{}{} \item {} \href{\#id16}{Introduction} \item {} \href{\#a-few-useful-functions}{A few useful functions} \item {} \href{\#functions-are-objects}{Functions are objects} \item {} \href{\#profiling-functions}{Profiling functions} \item {} \href{\#about-python-speed}{About Python speed} \item {} \href{\#tracing-functions}{Tracing functions} \item {} \href{\#tracing-objects}{Tracing objects} \item {} \href{\#inspecting-functions}{Inspecting functions} \end{list} \item {} \href{\#the-beauty-of-objects}{THE BEAUTY OF OBJECTS} \begin{list}{}{} \item {} \href{\#user-defined-objects}{User defined objects} \item {} \href{\#objects-have-static-methods-and-classmethods}{Objects have static methods and classmethods} \item {} \href{\#objects-have-their-privacy}{Objects have their privacy} \item {} \href{\#objects-have-properties}{Objects have properties} \item {} \href{\#objects-have-special-methods}{Objects have special methods} \item {} \href{\#objects-can-be-called-added-subtracted}{Objects can be called, added, subtracted, ...} \end{list} \item {} \href{\#the-power-of-classes}{THE POWER OF CLASSES} \begin{list}{}{} \item {} \href{\#the-concept-of-inheritance}{The concept of inheritance} \item {} \href{\#inheritance-versus-run-time-class-modifications}{Inheritance versus run-time class modifications} \item {} \href{\#inheriting-from-built-in-types}{Inheriting from built-in types} \item {} \href{\#controlling-the-creation-of-objects}{Controlling the creation of objects} \item {} \href{\#multiple-inheritance}{Multiple Inheritance} \item {} \href{\#cooperative-hierarchies}{Cooperative hierarchies} \item {} \href{\#inheritance-and-privacy}{Inheritance and privacy} \end{list} \item {} \href{\#the-sophistication-of-descriptors}{THE SOPHISTICATION OF DESCRIPTORS} \begin{list}{}{} \item {} \href{\#motivation}{Motivation} \item {} \href{\#functions-versus-methods}{Functions versus methods} \item {} \href{\#methods-versus-functions}{Methods versus functions} \item {} \href{\#static-methods-and-class-methods}{Static methods and class methods} \item {} \href{\#properties}{Properties} \item {} \href{\#user-defined-attribute-descriptors}{User-defined attribute descriptors} \item {} \href{\#data-descriptors}{Data descriptors} \item {} \href{\#the-super-attribute-descriptor}{The \texttt{super} attribute descriptor} \item {} \href{\#method-wrappers}{Method wrappers} \end{list} \item {} \href{\#the-subtleties-of-multiple-inheritance}{THE SUBTLETIES OF MULTIPLE INHERITANCE} \begin{list}{}{} \item {} \href{\#a-little-bit-of-history-why-python-2-3-has-changed-the-mro}{A little bit of history: why Python 2.3 has changed the MRO} \item {} \href{\#the-c3-method-resolution-order}{The C3 Method Resolution Order} \item {} \href{\#examples}{Examples} \item {} \href{\#bad-method-resolution-orders}{Bad Method Resolution Orders} \item {} \href{\#understanding-the-method-resolution-order}{Understanding the Method Resolution Order} \item {} \href{\#counting-instances}{Counting instances} \item {} \href{\#the-pizza-shop-example}{The pizza-shop example} \item {} \href{\#fixing-wrong-hierarchies}{Fixing wrong hierarchies} \item {} \href{\#modifying-hierarchies}{Modifying hierarchies} \item {} \href{\#inspecting-python-code}{Inspecting Python code} \end{list} \item {} \href{\#the-magic-of-metaclasses-part-i}{THE MAGIC OF METACLASSES - PART I} \begin{list}{}{} \item {} \href{\#metaclasses-as-class-factories}{Metaclasses as class factories} \item {} \href{\#metaclasses-as-class-modifiers}{Metaclasses as class modifiers} \item {} \href{\#a-few-caveats-about-the-usage-of-metaclasses}{A few caveats about the usage of metaclasses} \item {} \href{\#metaclasses-and-inheritance}{Metaclasses and inheritance} \item {} \href{\#conflicting-metaclasses}{Conflicting metaclasses} \item {} \href{\#cooperative-metaclasses}{Cooperative metaclasses} \item {} \href{\#metamethods-vs-class-methods}{Metamethods vs class methods} \end{list} \item {} \href{\#the-magic-of-metaclasses-part-2}{THE MAGIC OF METACLASSES - PART 2} \begin{list}{}{} \item {} \href{\#the-secrets-of-the-metaclass-hook}{The secrets of the \texttt{{\_}{\_}metaclass{\_}{\_}} hook} \item {} \href{\#anonymous-inner-metaclasses}{Anonymous inner metaclasses} \item {} \href{\#passing-parameters-to-meta-classes}{Passing parameters to (meta) classes} \item {} \href{\#meta-functions}{Meta-functions} \item {} \href{\#anonymous-cooperative-super-calls}{Anonymous cooperative super calls} \item {} \href{\#more-on-metaclasses-as-class-factories}{More on metaclasses as class factories} \item {} \href{\#programming-with-metaclasses}{Programming with metaclasses} \item {} \href{\#metaclass-aided-operator-overloading}{Metaclass-aided operator overloading} \end{list} \item {} \href{\#advanced-metaprogramming-techniques}{ADVANCED METAPROGRAMMING TECHNIQUES} \begin{list}{}{} \item {} \href{\#on-code-processing}{On code processing} \item {} \href{\#regular-expressions}{Regular expressions} \item {} \href{\#more-on-metaclasses-and-subclassing-built-in-types}{More on metaclasses and subclassing built-in types} \item {} \href{\#a-simple-state-machine}{A simple state machine} \item {} \href{\#creating-classes}{Creating classes} \item {} \href{\#modifying-modules}{Modifying modules} \item {} \href{\#metaclasses-and-attribute-descriptors}{Metaclasses and attribute descriptors} \item {} \href{\#id46}{Modifying hierarchies} \item {} \href{\#tracing-hierarchies}{Tracing hierarchies} \item {} \href{\#modifying-source-code}{Modifying source code} \item {} \href{\#metaclass-regenerated-hierarchies}{Metaclass regenerated hierarchies} \end{list} \item {} \href{\#the-programmable-programming-language}{THE PROGRAMMABLE PROGRAMMING LANGUAGE} \begin{list}{}{} \item {} \href{\#enhancing-the-python-language}{Enhancing the Python language} \item {} \href{\#restricting-python-dynamism}{Restricting Python dynamism} \item {} \href{\#changing-the-language-without-changing-the-language}{Changing the language without changing the language} \item {} \href{\#recognizing-magic-comments}{Recognizing magic comments} \item {} \href{\#interpreting-python-source-code-on-the-fly}{Interpreting Python source code on the fly} \item {} \href{\#implementing-lazy-evaluation}{Implementing lazy evaluation} \item {} \href{\#implementing-a-ternary-operator}{Implementing a ternary operator} \end{list} \end{list} \setcounter{chapter}{-1} %___________________________________________________________________________ \hypertarget{preface}{} \pdfbookmark[0]{Preface}{preface} \section*{Preface} \begin{quote} \begin{flushleft} \emph{There~is~only~one~way~to~learn:~trough~examples} \end{flushleft} \end{quote} %___________________________________________________________________________ \hypertarget{the-philosophy-of-this-book}{} \pdfbookmark[1]{The philosophy of this book}{the-philosophy-of-this-book} \subsection*{The philosophy of this book} This book is written with the intent to help the programmer going trough the fascinating concepts of Object Oriented Programming (OOP), in their Python incarnation. Notice that I say to help, not to teach. Actually, I do not think that a book can teach OOP or any other non-trivial matter in Computer Science or other disciplines. Only the practice can teach: practice, then practice, and practice again. You must learn yourself from your experiments, not from the books. Nevertheless, books are useful. They cannot teach, but they can help. They should give you new ideas that you was not thinking about, they should show tricks you do not find in the manual, and in general they should be of some guidance in the uphill road to knowledge. That is the philosophy of this book. For this reason 1. It is not comprehensive, not systematic; it is intended to give ideas and basis: from that the reader is expected to cover the missing part on his own, browsing the documentation, other sources and other books, and finally the definite autority, the source itself. 2. It will not even try to teach the \emph{best} practices. I will show what you can do with Python, not what you ``should'' do. Often I will show solutions that are not recommended. I am not a mammy saying this is good, this is bad, do this do that. 3. You can only learn from your failures. If you think ``it should work, if I do X and Y'' and it works, then you have learned nothing new. You have merely verified that your previous knowledge was correct, but you haven't create a new knowledge. On the other hand, when you think ``it should work, if I do X and Y'' and it doesn't, then you have learned that your previous knowlegde was wrong or incomplete, and you are forced to learn something new to overcome the difficulty. For this reason, I think it is useful to report not only how to do something, but also to report how not to do something, showing the pitfalls of wrong approaches. That's in my opinion is the goal of a good book. I don't know if have reached this goal or not (the decision is up to the reader), but at least I have tried to follow these guidelines. Moreover, this is not a book on OOP, it is a book on OOP \emph{in Python}. In other words, the point of view of this book is not to emphasize general topics of OOP that are exportable to other languages, but exactly the opposite: I want to emphasize specific techniques that one can only use in Python, or that are difficult to translate to other languages. Moreover, I will not provide comparisons with other languages (except for the section ``Why Python?'' in this introduction and in few selected other places), in order to keep the discussion focused. This choice comes from the initial motivation for this book, which was to fulfill a gap in the (otherwise excellent) Python documentation. The problem is that the available documentation still lacks an accessible reference of the new Python 2.2+ object-oriented features. Since myself I have learned Python and OOP from scratch, I have decided to write this book in order to fill that gap and help others. The emphasis in this book is not in giving solutions to specific problems (even if most of the recipes of this book can easily be tailored to solve real life concrete problems), it is in teaching how does it work, why it does work in some cases and why does not work in some other cases. Avoiding too specific problems has an additional bonus, since it allows me to use \emph{short} examples (the majority of the scripts presented here is under 20-30 lines) which I think are best suited to teach a new matter [\hyperlink{id2}{1}] . Notice, however, that whereas the majority of the scripts in this book are short, it is also true that they are pretty \emph{dense}. The density is due to various reasons: \newcounter{listcnt1} \begin{list}{\arabic{listcnt1}.} { \usecounter{listcnt1} \setlength{\rightmargin}{\leftmargin} } \item {} I am defining a lot of helper functions and classes, that are reused and enhanced during all the book. \item {} I am doing a strong use of inheritance, therefore a script at the end of the book can inherits from the classes defined through all the book; \item {} A ten line script involving metaclasses can easily perform the equivalent of generating hundreds of lines of code in a language without metaclasses such as Java or C++. \end{list} To my knowledge, there are no other books covering the same topics with the same focus (be warned, however, that I haven't read so many Python books ;-). The two references that come closest to the present book are the \texttt{Python Cookbook} by Alex Martelli and David Ascher, and Alex Martelli's \texttt{Python in a Nutshell}. They are quite recent books and therefore it covers (in much less detail) some of the 2.2 features that are the central topics to this book. However, the Cookbook reserves to OOP only one chapter and has a quite different philosophy from the present book, therefore there is practically no overlapping. Also \texttt{Python in a Nutshell} covers metaclasses in few pages, whereas half of this book is essentially dedied to them. This means that you can read both ;-) \begin{figure}[b]\hypertarget{id2}[1] Readers that prefer the opposite philosophy of using longer, real life-like, examples, have already the excellent ``Dive into Python'' book \href{http://diveintopython.org/}{http://diveintopython.org/} at their disposal. This is a very good book that I certainly recommend to any (experienced) Python programmer; it is also freely available (just like this ;-). However, the choice of arguments is quite different and there is essentially no overlap between my book and ``Dive into Python'' (therefore you can read both ;-). \end{figure} %___________________________________________________________________________ \hypertarget{for-who-this-book-in-intended}{} \pdfbookmark[1]{For who this book in intended}{for-who-this-book-in-intended} \subsection*{For who this book in intended} I have tried to make this tutorial useful to a large public of Pythonistas, i.e. both people with no previous experience of Object Oriented Programming and people with experience on OOP, but unfamiliar with the most recent Python 2.2-2.3 features (such as attribute descriptors, metaclasses, change of the MRO in multiple inheritance, etc). However, this is not a book for beginners: the non-experienced reader should check (at least) the Internet sites www.python.org/newbies.com and www.awaretek.com, that provide a nice collection of resources for Python newbies. These are my recommendations for the reader, according to her/his level: \newcounter{listcnt2} \begin{list}{\arabic{listcnt2}.} { \usecounter{listcnt2} \setlength{\rightmargin}{\leftmargin} } \item {} If you are an absolute beginner, with no experience on programming, this book is \emph{not} for you (yet ;-). Go to \href{http://www.python.org/doc/Newbies.html}{http://www.python.org/doc/Newbies.html} and read one of the introductive texts listed there, then come back here. I recommend ``How to Think Like a Computer Scientist'', available for free on the net (see \href{http://www.ibiblio.org/obp/thinkCSpy/}{http://www.ibiblio.org/obp/thinkCSpy/}); I found it useful myself when I started learning Python; be warned, however, that it refers to the rather old Python version 1.5.2. There are also excellent books on the market (see \href{http://www.awaretek.com/plf.html}{http://www.awaretek.com/plf.html}). \href{http://www.uselesspython.com/}{http://www.uselesspython.com/} is a good resource to find recensions about available Python books. For free books, look at \href{http://www.tcfb.com/freetechbooks/bookphyton.html}{http://www.tcfb.com/freetechbooks/bookphyton.html} . This is \emph{not} another Python tutorial. \item {} If you know already (at least) another programming language, but you don't know Python, then this book is \emph{not} for you (again ;-). Read the FAQ, the Python Tutorial and play a little with the Standard Library (all this material can be downloaded for free from \href{http://www.python.org}{http://www.python.org}), then come back here. \item {} If you have passed steps 1 and 2, and you are confortable with Python at the level of simple procedural programming, but have no clue about objects and classes, \emph{then} this book is for you. Read this book till the end and your knowledge of OOP will pass from zero to a quite advanced level (hopefully). Of course, you will have to play with the code in this book and write a lot of code on your own, first ;-) \item {} If you are confortable with Python and you also known OOP from other languages or from earlier version of Python, then this book is for you, too: you are ready to read the more advanced chapters. \item {} If you are a Python guru, then you should read the book, too. I expect you will find the errors and send me feedback, helping me to improve this tutorial. \end{list} %___________________________________________________________________________ \hypertarget{about-the-scripts-in-this-book}{} \pdfbookmark[1]{About the scripts in this book}{about-the-scripts-in-this-book} \subsection*{About the scripts in this book} All the scripts in this book are free. You are expected to play with them, to modify them and to improve them. In order to facilitate the extraction of the scripts from the main text, both visually for the reader and automatically for Python, I use the convention of sandwiching the body of the example scripts in blocks like this \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{print~"Here~Starts~the~Python~Way~to~Object~Oriented~Programming~!"}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} You may extract the source of this script with the a Python program called ``test.py'' and provided in the distribution. Simply give the following command: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\$}~python~test.py~myfirstscript.py} \end{flushleft}\end{ttfamily} \end{quote} This will create a file called ``myfirstscript.py'', containing the source of \texttt{myfirstscript.py}; moreover it will execute the script and write its output in a file called ``output.txt''. I have tested all the scripts in this tutorial under Red Hat Linux 7.x and Windows 98SE. You should not have any problem in running them, but if a problem is there, ``test.py'' will probably discover it, even if, unfortunately, it will not provide the solution :-(. Notice that test.py requires Python 2.3+ to work, since most of the examples in this book heavily depends on the new features introduced in Python 2.2-2.3. Since the installation of Python 2.3 is simple, quick and free, I think I am requiring to my readers who haven't upgraded yet a very little effort. This is well worth the pain since Python 2.3 fixes few bugs of 2.2 (notably in the subject of attribute descriptors and the \texttt{super} built-in) that makes You may give more arguments to test.py, as in this example: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\$}~python~test.py~myfirstscript.py~mysecondscript.py} \end{flushleft}\end{ttfamily} \end{quote} The output of both scripts will still be placed in the file ``output.txt''. Notice that if you give an argument which is not the name of a script in the book, it will be simply ignored. Morever, if you will not give any argument, ``test.py'' will automatically executes all the tutorial scripts, writing their output in ``output.txt'' [\hyperlink{id4}{2}] . You may want to give a look at this file, once you have finished the tutorial. It also contains the source code of the scripts, for better readability. Many examples of this tutorial depend on utility functions defined in a external module called \texttt{oopp} (\texttt{oopp} is an obvious abbreviation for the title of the tutorial). The module \texttt{oopp} is automatically generated by ``test.py'', which works by extracting from the tutorial text blocks of code of the form \texttt{{\#} something {\#}} and saving them in a file called ``oopp.py''. Let me give an example. A very recent enhancement to Python (in Python 2.3) has been the addition of a built-in boolean type with values True and False: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\$}~python}\\ \mbox{Python~2.3a1~({\#}1,~Jan~~6~2003,~10:31:14)}\\ \mbox{[GCC~2.96~20000731~(Red~Hat~Linux~7.2~2.96-108.7.2)]~on~linux2}\\ \mbox{Type~"help",~"copyright",~"credits"~or~"license"~for~more~information.}\\ \mbox{>>>~1+1==2}\\ \mbox{True}\\ \mbox{>>>~1+1==3}\\ \mbox{False}\\ \mbox{>>>~type(True)}\\ \mbox{}\\ \mbox{>>>~type(False)}\\ \mbox{} \end{flushleft}\end{ttfamily} \end{quote} However, previous version of Python use the integers 1 and 0 for True and False respectively. \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\$}~python}\\ \mbox{Python~2.2~({\#}1,~Apr~12~2002,~15:29:57)}\\ \mbox{[GCC~2.96~20000731~(Red~Hat~Linux~7.2~2.96-109)]~on~linux2}\\ \mbox{Type~"help",~"copyright",~"credits"~or~"license"~for~more~information.}\\ \mbox{>>>~1+1==2}\\ \mbox{1}\\ \mbox{>>>~1+1==3~}\\ \mbox{0} \end{flushleft}\end{ttfamily} \end{quote} Following the 2.3 convension, in this tutorial I will use the names \texttt{True} and \texttt{False} to denotes the numbers 1 and 0 respectively. This is automatic in Python 2.2.1+, but not in Python 2.2. Therefore, for sake of compatibility, it is convenient to set the values \texttt{True} and \texttt{False} in our utility module: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{import~{\_}{\_}builtin{\_}{\_}}\\ \mbox{try:~}\\ \mbox{~~~~{\_}{\_}builtin{\_}{\_}.True~~~{\#}look~if~True~is~already~defined}\\ \mbox{except~AttributeError:~{\#}~if~not~add~True~and~False~to~the~builtins}\\ \mbox{~~~~{\_}{\_}builtin{\_}{\_}.True~=~1}\\ \mbox{~~~~{\_}{\_}builtin{\_}{\_}.False~=~0}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Here there is an example of usage: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{import~oopp}\\ \mbox{print~"True~=",True,}\\ \mbox{print~"False~=",False}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} The output is ``True = 1 False = 0'' under Python 2.2 and ``True = True False = False'' under Python 2.3+. \begin{figure}[b]\hypertarget{id4}[2] ``test.py'', invoked without arguments, does not create '.py' files, since I don't want to kludge the distribution with dozens of ten-line scripts. I expect you may want to save only few scripts as standalone programs, and cut and paste the others. \end{figure} %___________________________________________________________________________ \hypertarget{conventions-used-in-this-book}{} \pdfbookmark[1]{Conventions used in this book}{conventions-used-in-this-book} \subsection*{Conventions used in this book} Python expressions are denoted with monospaced fonts when in the text. Sections marked with an asterisk can be skipped in a first reading. Typically they have the purpose of clarifying some subtle point and are not needed for the rest of the book. These sections are intended for the advanced reader, but could confuse the beginner. An example is the section about the difference between methods and functions, or the difference between the inheritance constraint and the metaclass constraint. %___________________________________________________________________________ \hypertarget{introduction}{} \pdfbookmark[0]{Introduction}{introduction} \section*{Introduction} \begin{quote} \begin{flushleft} \emph{A~language~that~doesn't~affect~the~way~you~think~about~programming,~\\ is~not~worth~knowing.}~--~Alan~Perlis \end{flushleft} \end{quote} %___________________________________________________________________________ \hypertarget{why-oop}{} \pdfbookmark[1]{Why OOP ?}{why-oop} \subsection*{Why OOP ?} I guess some of my readers, like me, have started programming in the mid-80's, when traditional (i.e. non object-oriented) Basic and Pascal where popular as first languages. At the time OOP was not as pervasive in software development how it is now, most of the mainstream languages were non-object-oriented and C++ was just being released. That was a time when the transition from spaghetti-code to structured code was already well accomplished, but the transition from structured programming to (the first phase of) OOP was at the beginning. Nowaydays, we live in a similar time of transition . Today, the transition to (the first phase of) OOP is well accomplished and essentially all mainstream languages support some elementary form of OOP. To be clear, when I say mainstream langauges, I have in mind Java and C++: C is a remarkable exception to the rule, since it is mainstream but not object-oriented. However, both Java an C++ (I mean standard Java and C++, not special extension like DTS C++, that have quite powerful object oriented features) are quite poor object-oriented languages: they provides only the most elementary aspects of OOP, the features of the \emph{first phase} of OOP. Hence, today the transition to the \emph{second phase} of OOP is only at the beginning, i.e mainstream language are not yet really OO, but they will become OOP in the near future. By second phase of OOP I mean the phase in which the primary objects of concern for the programmer are no more the objects, but the metaobjects. In elementary OOP one works on objects, which have attributes and methods (the evolution of old-fashioned data and functions) defined by their classes; in the second phase of OOP one works on classes which behavior is described by metaclasses. We no more modify objects trough classes: nowadays we modify classes and class hierarchies through metaclasses and multiple inheritance. It would be tempting to represent the history of programming in the last quarter of century with an evolutionary table like that: \begin{longtable}[c]{|p{0.30\locallinewidth}|p{0.25\locallinewidth}|p{0.27\locallinewidth}|p{0.10\locallinewidth}|} \hline \textbf{ {\textasciitilde}1975 } & \textbf{ {\textasciitilde}1985 } & \textbf{ {\textasciitilde}1995 } & \textbf{ {\textasciitilde}2005 } \\ \hline \endhead %[visit_tbody] procedural programming & OOP1 & OOP2 & ? \\ \hline data,functions & objects,classes & classes,metaclasses & ? \\ \hline %[depart_tbody] \end{longtable} The problem is that table would be simply wrong, since in truth Smalltalk had metaclasses already 25 years ago! And also Lisp had \emph{in nuce} everything a long \emph{long} time ago. The truth is that certains languages where too much ahead of their time ;-) Therefore, today we already have all the ideas and the conceptual tools to go beyond the first phase of OOP (they where invented 20-30 years ago), nevertheless those ideas are not yet universally known, nor implemented in mainstream languages. Fortunately, there are good languages where you can access the bonus of the second phase of OOP (Smalltalk, CLOS, Dylan, ...): unfortunately most of them are academic and/or little known in the real world (often for purely commercial reasons, since typically languages are not chosen accordingly to their merits, helas!). Python is an exception to this rule, in the sense that it is an eminently practical language (it started as a scripting language to do Operating System administrative jobs), which is relatively known and used in that application niche (even if some people \emph{wrongly} think that should not be used for 'serious' things). There are various reasons why most mainstream languages are rather poor languages, i.e. underfeatured languages (as Java) or powerful, but too tricky to use, as C++. Some are good reasons (for instance \emph{efficiency}: if efficiency is the first concern, then poor languages can be much better suited to the goal: for instance Fortran for number crunching and C for system programming), some are less good (economical monopoly). There is nothing to do against these reasons: if you need efficiency, or if you are forced to use a proprietary language because it is the language used by your employer. However, if you are free from these restrictions, there is another reason why you could not choose to use a poweful language. The reason is that, till now, programmers working in the industrial world mostly had simple problems (I mean conceptually simple problems). In order to solve simple problems one does not need a powerful language, and the effort spent in learning it is not worth. However, nowadays the situations has changed. Now, with Internet and graphics programming everywhere, and object-oriented languages so widespread, now it is the time when actually people \emph{needs} metaprogramming, the ability to changing classes and programs. Now everybody is programming in the large. In this situation, it is justified to spend some time to learn better way of programming. And of course, it is convenient to start from the language with the flattest learning curve of all. %___________________________________________________________________________ \hypertarget{why-python}{} \pdfbookmark[1]{Why Python ?}{why-python} \subsection*{Why Python ?} \begin{quote} \begin{flushleft} \emph{In~many~ways,~it's~a~dull~language,~borrowing~solid~old~concepts~from~~\\ many~other~languages~{\&}~styles:~~boring~syntax,~unsurprising~semantics,~\\ few~automatic~coercions,~etc~etc.~~But~that's~one~of~the~things~I~like~\\ about~it.}~~--Tim~Peters~on~Python,~16~Sep~93 \end{flushleft} \end{quote} If you are reading this book, I assume you already have some experience with Python. If this is the case, you already know the obvious advantages of Python such as readability, easy of use and short development time. Nevertheless, you could only have used Python as a fast and simple scripting language. If you are in this situation, then your risk to have an incorrect opinion on the language like ``it is a nice little language, but too simple to be useful in 'real' applications''. The truth is that Python is designed to be \emph{simple}, and actually it is; but by no means it is a ``shallow'' language. Actually, it goes quite \emph{deep}, but it takes some time to appreciate this fact. Let me contrast Python with Lisp, for instance. From the beginning, Lisp was intended to be a language for experts, for people with difficult problems to solve. The first users of Lisp were academicians, professors of CS and scientists. On the contrary, from the beginning Python was intended to be language for everybody (Python predecessor was ABC, a language invented to teach CS to children). Python makes great a first language for everybody, whereas Lisp would require especially clever and motivated students (and we all know that there is lack of them ;-) From this difference of origins, Python inherits an easy to learn syntax, whereas Lisp syntax is horrible for the beginner (even if not as horrible as C++ syntax ;-) \begin{quote} \begin{flushleft} \emph{Macros~are~a~powerful~extension~to~weak~languages.~\\ Powerful~languages~don't~need~macros~by~definition.}~~\\ --~Christian~Tismer~on~c.l.p.~(referring~to~C) \end{flushleft} \end{quote} Despite the differences, Python borrows quite a lot from Lisp and it is nearly as expressive as it (I say nearly since Python is not as powerful as Lisp: by tradition, Lisp has always been on the top of hierarchy of programming language with respect to power of abstraction). It is true that Python lacks some powerful Lisp features: for instance Python object model lacks multiple dispatching (for the time being ;-) and the language lacks Lisp macros (but this unlikely to change in the near future since Pythonistas see the lack of macro as a Good Thing [\hyperlink{id6}{3}]): nevertheless, the point is that Python is much \emph{much} easier to learn. You have (nearly) all the power, but without the complexity. One of the reasons, is that Python try to be as \emph{less} innovative as possible: it takes the proven good things from others, more innovative languages, and avoids their pitfalls. If you are an experienced programmer , it will be even easier to you to learn Python, since there is more or less nothing which is really original to Python. For instance: \newcounter{listcnt3} \begin{list}{\arabic{listcnt3}.} { \usecounter{listcnt3} \setlength{\rightmargin}{\leftmargin} } \item {} the object model is took from languages that are good at it, such as Smalltalk; \item {} multiple inheritance has been modeled from languages good in it. such as CLOS and Dylan; \item {} regular expression follows the road opened by Perl; \item {} functional features are borrowed from functional languages; \item {} the idea of documentation strings come from Lisp; \item {} list comprehension come from Haskell; \item {} iterators and generators come from Icon; \item {} etc. etc. (many other points here) \end{list} I thinks the really distinctive feature of Python with respect to any other serious language I know, is that Python is \emph{easy}. You have the power (I mean power in conceptual sense, not computational power: in the sense of computational power the best languages are non-object-oriented ones) of the most powerful languages with a very little investement. In addition to that, Python has a relatively large user base (as compared to Smalltalk or Ruby, or the various fragmented Lisp communities). Of course, there is quite a difference between the user base of Python with respect to the user base of, let say, VisualBasic or Perl. But I would never take in consideration VisualBasic for anything serious, whereas Perl is too ugly for my taste ;-). Finally, Python is \emph{practical}. With this I mean the fact that Python has libraries that allow the user to do nearly everything, since you can access all the C/C++ libraries with little or no effort, and all the Java libraries, though the Python implementation known as Jython. In particular, one has the choice between many excellent GUI's trough PyQt, wxPython, Tkinter, etc. Python started as an Object Oriented Programming Languages from the beginning, nevertheless is was never intended to be a \emph{pure} OOPL as SmallTalk or, more recently, Ruby. Python is a \emph{multiparadigm} language such a Lisp, that you choose your programming style according to your problem: spaghetti-code, structured programming, functional programming, object-oriented programming are all supported. You can even write bad code in Python, even if it is less simple than in other languages ;-). Python is a language which has quite evolved in its twelve years of life (the first public release was released in February 1991) and many new features have been integrated in the language with time. In particular, Python 2.2 (released in 2002) was a major breakthrough in the history of the language for what concerns support to Object Oriented Programming (OOP). Before the 2.2 revolution, Python Object Orientation was good; now it is \emph{excellent}. All the fundamental features of OOP, including pretty sophisticated ones, as metaclasses and multiple inheritance, have now a very good support (the only missing thing is multiple dispatching). \begin{figure}[b]\hypertarget{id6}[3] Python lacks macros for an intentional design choice: many people in the community (including Guido itself) feel that macros are ``too powerful''. If you give the user the freedom to create her own language, you must face at least three problems: i) the risk to split the original language in dozens of different dialects; ii) in collaborative projects, the individual programmer must spend an huge amount of time and effort would be spent in learning macro systems written by others; iii) not all users are good language designers: the programmer will have to fight with badly designed macro systems. Due to these problems, it seems unlikely that macros will be added to Python in the future. \end{figure} \begin{figure}[b]\hypertarget{id7}[4] For a good comparison between Python and Lisp I remind the reader to the excellent Peter Norvig's article in \href{http://www.norvig.com/python-lisp.html}{http://www.norvig.com/python-lisp.html} \end{figure} %___________________________________________________________________________ \hypertarget{further-thoughts}{} \pdfbookmark[1]{Further thoughts}{further-thoughts} \subsection*{Further thoughts} Actually, the principal reasons why I begun studying Python was the documentation and the newsgroup: Python has an outstanding freely available documentation and an incredibly helpful newsgroup that make extremely easy to learn the language. If I had found a comparable free documentation/newsgroup for C++ or Lisp, I would have studied that languages instead. Unfortunately, the enormous development at the software level, had no correspondence with with an appropriate development of documentation. As a consequence, the many beatiful, powerful and extremely \emph{useful} new features of Python 2.2+ object orientation are mostly remained confined to developers and power users: the average Python programmer has remained a little a part from the rapid development and she \emph{wrongly} thinks she has no use for the new features. There have also been \emph{protestations} of the users against developers of the kind ``please, stop adding thousands of complicated new extensions to the language for which we have no use'' ! Extending a language is always a delicate thing to do, for a whole bunch of reasons: \newcounter{listcnt4} \begin{list}{\arabic{listcnt4}.} { \usecounter{listcnt4} \setlength{\rightmargin}{\leftmargin} } \item {} once one extension is done, it is there \emph{forever}. \end{list} My experience has been the following. When I first read about metaclasses, in Guido's essay ``Unifying types and classes in Python 2.2'', I thought ``Wow, classes of classes, cool concept, but how useful is it? Are metaclasses really providing some new functionality? What can I do with metaclasses that I cannot do without?'' Clearly, in these terms, the question is rather retorical, since in principle any Turing-complete programming languages contains all the features provided by metaclasses. Python metaclasses themselves are implemented in C, that has no metaclasses. Therefore, my real question was not ``What can I do with metaclasses that I cannot do without?'' but ``How big is the convenience provided by metaclasses, with respect to my typical applications?''. The answer depends on the kind of problem you are considering. For certain classes of problems it can be \emph{very} large, as I will show in this and in the next chapters. I think the biggest advantage of metaclasses is \emph{elegance}. Altough it is true that most of what you can do with metaclasses, can be done without metaclasses, not using metaclasses can result in a much \emph{uglier} solution. One needs difficult problems in order to appreciate the advantage of powerful methods. If all you need is to write few scripts for copying two or three files, there is no point in learning OOP.On the other hand, if you only write simple programs where you define only one of two classes, there is no point in using metaclasses. Metaclasses becomes relevant only when you have many classes, whole classes of classes with similar features that you want to modify. In this sense, metaprogramming is for experts only, i.e. with people with difficult problems. The point however, is that nowaydays, many persons have difficult problems. Finally, let me conclude this preface by recalling the gist of Python wisdom. \begin{quote} \begin{verbatim}>>> import this The Zen of Python, by Tim Peters . Beautiful is better than ugly. Explicit is better than implicit. Simple is better than complex. Complex is better than complicated. Flat is better than nested. Sparse is better than dense. Readability counts. Special cases aren't special enough to break the rules. Although practicality beats purity. Errors should never pass silently. Unless explicitly silenced. In the face of ambiguity, refuse the temptation to guess. There should be one-- and preferably only one --obvious way to do it. Although that way may not be obvious at first unless you're Dutch. Now is better than never. Although never is often better than *right* now. If the implementation is hard to explain, it's a bad idea. If the implementation is easy to explain, it may be a good idea. Namespaces are one honking great idea -- let's do more of those!\end{verbatim} \end{quote} %___________________________________________________________________________ \hypertarget{first-things-first}{} \pdfbookmark[0]{FIRST THINGS, FIRST}{first-things-first} \section*{FIRST THINGS, FIRST} This is an introductory chapter, with the main purpose of fixing the terminology used in the sequel. In particular, I give the definitions of objects, classes, attributes and methods. I discuss a few examples and I show some of the most elementary Python introspection features. %___________________________________________________________________________ \hypertarget{what-s-an-object}{} \pdfbookmark[1]{What's an object?}{what-s-an-object} \subsection*{What's an object?} \begin{quote} \begin{flushleft} \emph{So~Everything~Is~An~object.~~~\\ I'm~sure~the~Smalltalkers~are~very~happy~:)}~\\ ~\\ --~Michael~Hudson~on~comp.lang.python \end{flushleft} \end{quote} ``What's an object'' is the obvious question raised by anybody starting to learn Object Oriented Programming. The answer is simple: in Python, everything in an object! An operative definition is the following: an \emph{object} is everything that can be labelled with an \emph{object reference}. In practical terms, the object reference is implemented as the object memory address, that is an integer number which uniquely specify the object. There is a simple way to retrieve the object reference: to use the builtin \texttt{id} function. Informations on \texttt{id} can be retrieved via the \texttt{help} function [\hyperlink{id7}{4}]: \begin{quote} \begin{verbatim}>>> help(id) Help on built-in function id: id(...) id(object) -> integer Return the identity of an object. This is guaranteed to be unique among simultaneously existing objects. (Hint: it's the object's memory address.)\end{verbatim} \end{quote} The reader is strongly encouraged to try the help function on everything (including help(help) ;-). This is the best way to learn how Python works, even \emph{better} than reading the standard documentation, since the on-line help is often more update. Suppose for instance we wonder if the number \texttt{1} is an object: it is easy enough to ask Python for the answer: \begin{quote} \begin{verbatim}>>> id(1) 135383880\end{verbatim} \end{quote} Therefore the number 1 is a Python object and it is stored at the memory address 135383880, at least in my computer and during the current session. Notice that the object reference is a dynamic thing; nevertheless it is guaranteed to be unique and constant for a given object during its lifetime (two objects whose lifetimes are disjunct may have the same id() value, though). Here there are other examples of built-in objects: \begin{quote} \begin{verbatim}>>> id(1L) # long 1074483312 >>> id(1.0) #float 135682468 >>> id(1j) # complex 135623440 >>> id('1') #string 1074398272 >>> id([1]) #list 1074376588 >>> id((1,)) #tuple 1074348844 >>> id({1:1}) # dict 1074338100\end{verbatim} \end{quote} Even functions are objects: \begin{quote} \begin{verbatim}>>> def f(x): return x #user-defined function >>> id(f) 1074292020 >>> g=lambda x: x #another way to define functions >>> id(g) 1074292468 >>> id(id) #id itself is a built-in function 1074278668\end{verbatim} \end{quote} Modules are objects, too: \begin{quote} \begin{verbatim}>>> import math >>> id(math) #module of the standard library 1074239068 >>> id(math.sqrt) #function of the standard library 1074469420\end{verbatim} \end{quote} \texttt{help} itself is an object: \begin{quote} \begin{verbatim}>>> id(help) 1074373452\end{verbatim} \end{quote} Finally, we may notice that the reserved keywords are not objects: \begin{quote} \begin{verbatim}>>> id(print) #error File "", line 1 id(print) ^ SyntaxError: invalid syntax\end{verbatim} \end{quote} The operative definition is convenient since it gives a practical way to check if something is an object and, more importantly, if two objects are the same or not: \begin{quote} % doctest \begin{verbatim}>>> s1='spam' >>> s2='spam' >>> s1==s2 True >>> id(s1)==id(s2) True\end{verbatim} \end{quote} A more elegant way of spelling \texttt{id(obj1)==id(obj2)} is to use the keyword \texttt{is}: \begin{quote} \begin{verbatim}>>> s1 is s2 True\end{verbatim} \end{quote} However, I should warn the reader that sometimes \texttt{is} can be surprising: \begin{quote} \begin{verbatim}>>> id([]) == id([]) True >>> [] is [] False\end{verbatim} \end{quote} This is happening because writing \texttt{id([])} dynamically creates an unique object (a list) which goes away when you're finished with it. So when an expression needs both at the same time (\texttt{[] is []}), two unique objects are created, but when an expression doesn't need both at the same time (\texttt{id([]) == id([])}), an object gets created with an ID, is destroyed, and then a second object is created with the same ID (since the last one just got reclaimed) and their IDs compare equal. In other words, ``the ID is guaranteed to be unique \emph{only} among simultaneously existing objects''. Another surprise is the following: \begin{quote} \begin{verbatim}>>> a=1 >>> b=1 >>> a is b True >>> a=556 >>> b=556 >>> a is b False\end{verbatim} \end{quote} The reason is that integers between 0 and 99 are pre-instantiated by the interpreter, whereas larger integers are recreated each time. Notice the difference between '==' and 'is': \begin{quote} \begin{verbatim}>>> 1L==1 True\end{verbatim} \end{quote} but \begin{quote} \begin{verbatim}>>> 1L is 1 False \end{verbatim} \end{quote} since they are different objects: \begin{quote} \begin{verbatim}>>> id(1L) # long 1 135625536 >>> id(1) # int 1 135286080\end{verbatim} \end{quote} The disadvantage of the operative definition is that it gives little understanding of what an object can be used for. To this aim, I must introduce the concept of \emph{class}. \begin{figure}[b]\hypertarget{id9}[5] Actually \texttt{help} is not a function but a callable object. The difference will be discussed in a following chapter. \end{figure} %___________________________________________________________________________ \hypertarget{objects-and-classes}{} \pdfbookmark[1]{Objects and classes}{objects-and-classes} \subsection*{Objects and classes} It is convenient to think of an object as an element of a set. It you think a bit, this is the most general definition that actually grasps what we mean by object in the common language. For instance, consider this book, ``Object Oriented Programming in Python'': this book is an object, in the sense that it is a specific representative of the \emph{class} of all possible books. According to this definition, objects are strictly related to classes, and actually we say that objects are \emph{instances} of classes. Classes are nested: for instance this book belongs to the class of books about programming language, which is a subset of the class of all possible books; moreover we may further specify this book as a Python book; moreover we may specify this book as a Python 2.2+ book. There is no limit to the restrictions we may impose to our classes. On the other hand. it is convenient to have a ``mother'' class, such that any object belongs to it. All strongly Object Oriented Language have such a class [\hyperlink{id9}{5}]; in Python it is called \emph{object}. The relation between objects and classes in Python can be investigated trough the built-in function \texttt{type} [\hyperlink{id12}{6}] that gives the class of any Python object. Let me give some example: \newcounter{listcnt5} \begin{list}{\arabic{listcnt5}.} { \usecounter{listcnt5} \setlength{\rightmargin}{\leftmargin} } \item {} Integers numbers are instances of the class \texttt{int} or \texttt{long}: \end{list} \begin{quote} \begin{verbatim}>>> type(1) >>> type(1L) \end{verbatim} \end{quote} \newcounter{listcnt6} \begin{list}{\arabic{listcnt6}.} { \usecounter{listcnt6} \addtocounter{listcnt6}{1} \setlength{\rightmargin}{\leftmargin} } \item {} Floating point numbers are instances of the class \texttt{float}: \end{list} \begin{quote} \begin{verbatim}>>> type(1.0) \end{verbatim} \end{quote} \newcounter{listcnt7} \begin{list}{\arabic{listcnt7}.} { \usecounter{listcnt7} \addtocounter{listcnt7}{2} \setlength{\rightmargin}{\leftmargin} } \item {} Complex numbers are instances of the class \texttt{complex}: \end{list} \begin{quote} \begin{verbatim}>>> type(1.0+1.0j) \end{verbatim} \end{quote} \newcounter{listcnt8} \begin{list}{\arabic{listcnt8}.} { \usecounter{listcnt8} \addtocounter{listcnt8}{3} \setlength{\rightmargin}{\leftmargin} } \item {} Strings are instances of the class \texttt{str}: \end{list} \begin{quote} \begin{verbatim}>>> type('1') \end{verbatim} \end{quote} \newcounter{listcnt9} \begin{list}{\arabic{listcnt9}.} { \usecounter{listcnt9} \addtocounter{listcnt9}{4} \setlength{\rightmargin}{\leftmargin} } \item {} List, tuples and dictionaries are instances of \texttt{list}, \texttt{tuple} and \texttt{dict} respectively: \end{list} \begin{quote} \begin{verbatim}>>> type('1') >>> type([1]) >>> type((1,)) >>> type({1:1}) \end{verbatim} \end{quote} \newcounter{listcnt10} \begin{list}{\arabic{listcnt10}.} { \usecounter{listcnt10} \addtocounter{listcnt10}{5} \setlength{\rightmargin}{\leftmargin} } \item {} User defined functions are instances of the \texttt{function} built-in type \end{list} \begin{quote} \begin{verbatim}>>> type(f) >>> type(g) \end{verbatim} \end{quote} All the previous types are subclasses of object: \begin{quote} \begin{verbatim}>>> for cl in int,long,float,str,list,tuple,dict: issubclass(cl,object) True True True True True True True\end{verbatim} \end{quote} However, Python is not a 100{\%} pure Object Oriented Programming language and its object model has still some minor warts, due to historical accidents. Paraphrasing George Orwell, we may say that in Python 2.2-2.3, all objects are equal, but some objects are more equal than others. Actually, we may distinguish Python objects in new style objects, or rich man objects, and old style objects, or poor man objects. New style objects are instances of new style classes whereas old style objects are instances of old style classes. The difference is that new style classes are subclasses of object whereas old style classes are not. Old style classes are there for sake of compatibility with previous releases of Python, but starting from Python 2.2 practically all built-in classes are new style classes. Instance of old style classes are called old style objects. I will give few examples of old style objects in the future. In this tutorial with the term object \emph{tout court} we will mean new style objects, unless the contrary is explicitely stated. \begin{figure}[b]\hypertarget{id12}[6] one may notice that C++ does not have such a class, but C++ is \emph{not} a strongly object oriented language ;-) \end{figure} \begin{figure}[b]\hypertarget{id13}[7] Actually \texttt{type} is not a function, but a metaclass; nevertheless, since this is an advanced concept, discussed in the fourth chapter; for the time being it is better to think of \texttt{type} as a built-in function analogous to \texttt{id}. \end{figure} %___________________________________________________________________________ \hypertarget{objects-have-attributes}{} \pdfbookmark[1]{Objects have attributes}{objects-have-attributes} \subsection*{Objects have attributes} All objects have attributes describing their characteristics, that may be accessed via the dot notation \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{objectname.objectattribute} \end{flushleft}\end{ttfamily} \end{quote} The dot notation is common to most Object Oriented programming languages, therefore the reader with a little of experience should find it not surprising at all (Python strongly believes in the Principle of Least Surprise). However, Python objects also have special attributes denoted by the double-double underscore notation \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{objectname.{\_}{\_}specialattribute{\_}{\_}} \end{flushleft}\end{ttfamily} \end{quote} with the aim of helping the wonderful Python introspection features, that does not have correspondence in all OOP language. Consider for example the string literal ``spam''. We may discover its class by looking at its special attribute \emph{{\_}{\_}class{\_}{\_}}: \begin{quote} \begin{verbatim}>>> 'spam'.__class__ \end{verbatim} \end{quote} Using the \texttt{{\_}{\_}class{\_}{\_}} attribute is not always equivalent to using the \texttt{type} function, but it works for all built-in types. Consider for instance the number \emph{1}: we may extract its class as follows: \begin{quote} \begin{verbatim}>>> (1).__class__ \end{verbatim} \end{quote} Notice that the parenthesis are needed to avoid confusion between the integer 1 and the float (1.). The non-equivalence type/class is the key to distinguish new style objects from old style, since for old style objects \texttt{type(obj)<>obj.{\_}{\_}class{\_}{\_}}. We may use this knowledge to make and utility function that discovers if an object is a ``real'' object (i.e. new style) or a poor man object: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~isnewstyle(obj):}\\ \mbox{~~~~try:~{\#}some~objects~may~lack~a~{\_}{\_}class{\_}{\_}~attribute~}\\ \mbox{~~~~~~~~obj.{\_}{\_}class{\_}{\_}}\\ \mbox{~~~~except~AttributeError:}\\ \mbox{~~~~~~~~return~False}\\ \mbox{~~~~else:~{\#}look~if~there~is~unification~type/class}\\ \mbox{~~~~~~~~return~type(obj)~is~obj.{\_}{\_}class{\_}{\_}}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Let us check this with various examples: \begin{quote} \begin{verbatim}>>> from oopp import isnewstyle >>> isnewstyle(1) True >>> isnewstyle(lambda x:x) True >>> isnewstyle(id) True >>> isnewstyle(type) True >>> isnewstyle(isnewstyle) True >>> import math >>> isnewstyle(math) True >>> isnewstyle(math.sqrt) True >>> isnewstyle('hello') True\end{verbatim} \end{quote} It is not obvious to find something which is not a real object, between the built-in objects, however it is possible. For instance, the \texttt{help} ``function'' is an old style object: \begin{quote} \begin{verbatim}>>> isnewstyle(help) False\end{verbatim} \end{quote} since \begin{quote} \begin{verbatim}>>> help.__class__ \end{verbatim} \end{quote} is different from \begin{quote} \begin{verbatim}>>> type(help) \end{verbatim} \end{quote} Regular expression objects are even poorer objects with no \texttt{{\_}{\_}class{\_}{\_}} attribute: \begin{quote} \begin{verbatim}>>> import re >>> reobj=re.compile('somestring') >>> isnewstyle(reobj) False >>> type(reobj) >>> reobj.__class__ #error Traceback (most recent call last): File "", line 1, in ? AttributeError: __class__\end{verbatim} \end{quote} There other special attributes other than \texttt{{\_}{\_}class{\_}{\_}}; a particularly useful one is \texttt{{\_}{\_}doc{\_}{\_}}, that contains informations on the class it refers to. Consider for instance the \texttt{str} class: by looking at its \texttt{{\_}{\_}doc{\_}{\_}} attribute we can get information on the usage of this class: \begin{quote} \begin{verbatim}>>> str.__doc__ str(object) -> string Return a nice string representation of the object. If the argument is a string, the return value is the same object.\end{verbatim} \end{quote} From that docstring we learn how to convert generic objects in strings; for instance we may convert numbers, lists, tuples and dictionaries: \begin{quote} \begin{verbatim}>>> str(1) '1' >>> str([1]) '[1]' >>> str((1,)) (1,)' >>> str({1:1}) '{1: 1}'\end{verbatim} \end{quote} \texttt{str} is implicitely called each time we use the \texttt{print} statement, since \texttt{print obj} is actually syntactic sugar for \texttt{print str(obj)}. Classes and modules have another interesting special attribute, the \texttt{{\_}{\_}dict{\_}{\_}} attribute that gives the content of the class/module. For instance, the contents of the standard \texttt{math} module can be retrieved as follows: \begin{quote} \begin{verbatim}>>> import math >>> for key in math.__dict__: print key, ... fmod atan pow __file__ cosh ldexp hypot sinh __name__ tan ceil asin cos e log fabs floor tanh sqrt __doc__ frexp atan2 modf exp acos pi log10 sin\end{verbatim} \end{quote} Alternatively, one can use the built-in function \texttt{vars}: \begin{quote} \begin{verbatim}>>> vars(math) is math.__dict__ True\end{verbatim} \end{quote} This identity is true for any object with a \texttt{{\_}{\_}dict{\_}{\_}} attribute. Two others interesting special attributes are \texttt{{\_}{\_}doc{\_}{\_}} \begin{quote} \begin{verbatim}>>> print math.__doc__ This module is always available. It provides access to the mathematical functions defined by the C standard. \end{verbatim} \end{quote} and \texttt{{\_}{\_}file{\_}{\_}}: \begin{quote} \begin{verbatim}>>> math.__file__ #gives the file associated with the module '/usr/lib/python2.2/lib-dynload/mathmodule.so'\end{verbatim} \end{quote} %___________________________________________________________________________ \hypertarget{objects-have-methods}{} \pdfbookmark[1]{Objects have methods}{objects-have-methods} \subsection*{Objects have methods} In addition to attributes, objects also have \emph{methods}, i.e. functions attached to their classes [\hyperlink{id13}{7}]. Methods are also invoked with the dot notation, but they can be distinguished by attributes because they are typically called with parenthesis (this is a little simplistic, but it is enough for an introductory chapter). As a simple example, let me show the invocation of the \texttt{split} method for a string object: \begin{quote} \begin{verbatim}>>> s='hello world!' >>> s.split() ['hello', 'world!']\end{verbatim} \end{quote} In this example \texttt{s.split} is called a \emph{bount method}, since it is applied to the string object \texttt{s}: \begin{quote} \begin{verbatim}>>> s.split \end{verbatim} \end{quote} An \emph{unbound method}, instead, is applied to the class: in this case the unbound version of \texttt{split} is applied to the \texttt{str} class: \begin{quote} \begin{verbatim}>>> str.split \end{verbatim} \end{quote} A bound method is obtained from its corresponding unbound method by providing the object to the unbound method: for instance by providing \texttt{s} to \texttt{str.split} we obtain the same effect of \titlereference{s.split()}: \begin{quote} \begin{verbatim}>>> str.split(s) ['hello', 'world!']\end{verbatim} \end{quote} This operation is called \emph{binding} in the Python literature: when write \texttt{str.split(s)} we bind the unbound method \texttt{str.split} to the object \texttt{s}. It is interesting to recognize that the bound and unbound methods are \emph{different} objects: \begin{quote} \begin{verbatim}>>> id(str.split) # unbound method reference 135414364 >>> id(s.split) # this is a different object! 135611408\end{verbatim} \end{quote} The unbound method (and therefore the bound method) has a \texttt{{\_}{\_}doc{\_}{\_}} attribute explaining how it works: \begin{quote} \begin{verbatim}>>> print str.split.__doc__ S.split([sep [,maxsplit]]) -> list of strings Return a list of the words in the string S, using sep as the delimiter string. If maxsplit is given, at most maxsplit splits are done. If sep is not specified or is None, any whitespace string is a separator.\end{verbatim} \end{quote} \begin{figure}[b]\hypertarget{id15}[8] A precise definition will be given in chapter 5 that introduces the concept of attribute descriptors. There are subtle differences between functions and methods. \end{figure} %___________________________________________________________________________ \hypertarget{summing-objects}{} \pdfbookmark[1]{Summing objects}{summing-objects} \subsection*{Summing objects} In a pure object-oriented world, there are no functions and everything is done trough methods. Python is not a pure OOP language, however quite a lot is done trough methods. For instance, it is quite interesting to analyze what happens when an apparently trivial statement such as \begin{quote} \begin{verbatim}>>> 1+1 2\end{verbatim} \end{quote} is executed in an object-oriented world. The key to understand, is to notice that the number 1 is an object, specifically an instance of class \texttt{int}: this means that that 1 inherits all the methods of the \texttt{int} class. In particular it inherits a special method called \texttt{{\_}{\_}add{\_}{\_}}: this means 1+1 is actually syntactic sugar for \begin{quote} \begin{verbatim}>>> (1).__add__(1) 2\end{verbatim} \end{quote} which in turns is syntactic sugar for \begin{quote} \begin{verbatim}>>> int.__add__(1,1) 2\end{verbatim} \end{quote} The same is true for subtraction, multiplication, division and other binary operations. \begin{quote} \begin{verbatim}>>> 'hello'*2 'hellohello' >>> (2).__mul__('hello') 'hellohello' >>> str.__mul__('hello',2) 'hellohello'\end{verbatim} \end{quote} However, notice that \begin{quote} \begin{verbatim}>>> str.__mul__(2,'hello') #error Traceback (most recent call last): File "", line 1, in ? TypeError: descriptor '__mul__' requires a 'str' object but received a 'int'\end{verbatim} \end{quote} The fact that operators are implemented as methods, is the key to \emph{operator overloading}: in Python (as well as in other OOP languages) the user can redefine the operators. This is already done by default for some operators: for instance the operator \texttt{+} is overloaded and works both for integers, floats, complex numbers and for strings. %___________________________________________________________________________ \hypertarget{inspecting-objects}{} \pdfbookmark[1]{Inspecting objects}{inspecting-objects} \subsection*{Inspecting objects} In Python it is possible to retrieve most of the attributes and methods of an object by using the built-in function \texttt{dir()} (try \texttt{help(dir)} for more information). Let me consider the simplest case of a generic object: \begin{quote} \begin{verbatim}>>> obj=object() >>> dir(obj) ['__class__', '__delattr__', '__doc__', '__getattribute__', '__hash__', '__init__', '__new__', '__reduce__', '__repr__', '__setattr__', '__str__']\end{verbatim} \end{quote} As we see, there are plenty of attributes available even to a do nothing object; many of them are special attributes providing introspection capabilities which are not common to all programming languages. We have already discussed the meaning of some of the more obvious special attributes. The meaning of some of the others is quite non-obvious, however. The docstring is invaluable in providing some clue. Notice that there are special \emph{hidden} attributes that cannot be retrieved with \texttt{dir()}. For instance the \texttt{{\_}{\_}name{\_}{\_}} attribute, returning the name of the object (defined for classes, modules and functions) and the \texttt{{\_}{\_}subclasses{\_}{\_}} method, defined for classes and returning the list of immediate subclasses of a class: \begin{quote} \begin{verbatim}>>> str.__name__ 'str' >>> str.__subclasses__.__doc__ '__subclasses__() -> list of immediate subclasses' >>> str.__subclasses__() # no subclasses of 'str' are currently defined []\end{verbatim} \end{quote} For instance by doing \begin{quote} \begin{verbatim}>>> obj.__getattribute__.__doc__ "x.__getattribute__('name') <==> x.name"\end{verbatim} \end{quote} we discover that the expression \texttt{x.name} is syntactic sugar for \begin{quote} \texttt{x.{\_}{\_}getattribute{\_}{\_}('name')} \end{quote} Another equivalent form which is more often used is \begin{quote} \texttt{getattr(x,'name')} \end{quote} We may use this trick to make a function that retrieves all the attributes of an object except the special ones: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~special(name):~return~name.startswith('{\_}{\_}')~and~name.endswith('{\_}{\_}')}\\ \mbox{}\\ \mbox{def~attributes(obj,condition=lambda~n,v:~not~special(n)):}\\ \mbox{~~~~"""Returns~a~dictionary~containing~the~accessible~attributes~of~}\\ \mbox{~~~~an~object.~By~default,~returns~the~non-special~attributes~only."""}\\ \mbox{~~~~dic={\{}{\}}}\\ \mbox{~~~~for~attr~in~dir(obj):}\\ \mbox{~~~~~~~~try:~v=getattr(obj,attr)}\\ \mbox{~~~~~~~~except:~continue~{\#}attr~is~not~accessible}\\ \mbox{~~~~~~~~if~condition(attr,v):~dic[attr]=v}\\ \mbox{~~~~return~dic}\\ \mbox{}\\ \mbox{getall~=~lambda~n,v:~True}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Notice that certain attributes may be unaccessible (we will see how to make attributes unaccessible in a following chapter) and in this case they are simply ignored. For instance you may retrieve the regular (i.e. non special) attributes of the built-in functions: \begin{quote} \begin{verbatim}>>> from oopp import attributes >>> attributes(f).keys() ['func_closure', 'func_dict', 'func_defaults', 'func_name', 'func_code', 'func_doc', 'func_globals']\end{verbatim} \end{quote} In the same vein of the \texttt{getattr} function, there is a built-in \texttt{setattr} function (that actually calls the \texttt{{\_}{\_}setattr{\_}{\_}} built-in method), that allows the user to change the attributes and methods of and object. Informations on \texttt{setattr} can be retrieved from the help function: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{>>>~help(setattr)}\\ \mbox{Help~on~built-in~function~setattr:}\\ \mbox{setattr(...)}\\ \mbox{setattr(object,~name,~value)}\\ \mbox{Set~a~named~attribute~on~an~object;~setattr(x,~'y',~v)~is~equivalent~to}\\ \mbox{``x.y~=~v''.} \end{flushleft}\end{ttfamily} \end{quote} \texttt{setattr} can be used to add attributes to an object: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{import~sys}\\ \mbox{}\\ \mbox{def~customize(obj,errfile=None,**kw):}\\ \mbox{~~~~"""Adds~attributes~to~an~object,~if~possible.~If~not,~writes~an~error}\\ \mbox{~~~~message~on~'errfile'.~If~errfile~is~None,~skips~the~exception."""}\\ \mbox{~~~~for~k~in~kw:}\\ \mbox{~~~~~~~~try:~}\\ \mbox{~~~~~~~~~~~~setattr(obj,k,kw[k])}\\ \mbox{~~~~~~~~except:~{\#}~setting~error}\\ \mbox{~~~~~~~~~~~~if~errfile:}\\ \mbox{~~~~~~~~~~~~~~~~print~>>~errfile,"Error:~{\%}s~cannot~be~set"~{\%}~k}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} The attributes of built-in objects cannot be set, however: \begin{quote} \begin{verbatim}>>> from oopp import customize,sys >>> customize(object(),errfile=sys.stdout,newattr='hello!') #error AttributeError: newattr cannot be set\end{verbatim} \end{quote} On the other hand, the attributes of modules can be set: \begin{quote} \begin{verbatim}>>> import time >>> customize(time,newattr='hello!') >>> time.newattr 'hello!'\end{verbatim} \end{quote} Notice that this means we may enhances modules at run-time, but adding new routines, not only new data attributes. The \texttt{attributes} and \texttt{customize} functions work for any kind of objects; in particular, since classes are a special kind of objects, they work for classes, too. Here are the attributes of the \texttt{str}, \texttt{list} and \texttt{dict} built-in types: \begin{quote} \begin{verbatim}>>> from oopp import attributes >>> attributes(str).keys() ['startswith', 'rjust', 'lstrip', 'swapcase', 'replace','encode', 'endswith', 'splitlines', 'rfind', 'strip', 'isdigit', 'ljust', 'capitalize', 'find', 'count', 'index', 'lower', 'translate','join', 'center', 'isalnum','title', 'rindex', 'expandtabs', 'isspace', 'decode', 'isalpha', 'split', 'rstrip', 'islower', 'isupper', 'istitle', 'upper'] >>> attributes(list).keys() ['append', 'count', 'extend', 'index', 'insert', 'pop', 'remove', 'reverse', 'sort'] >>> attributes(dict).keys() ['clear','copy','fromkeys', 'get', 'has_key', 'items','iteritems', 'iterkeys', 'itervalues', 'keys', 'pop', 'popitem', 'setdefault', 'update', 'values']\end{verbatim} \end{quote} Classes and modules have a special attribute \texttt{{\_}{\_}dict{\_}{\_}} giving the dictionary of their attributes. Since it is often a quite large dictionary, it is convenient to define an utility function printing this dictionary in a nice form: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~pretty(dic):}\\ \mbox{~~~~"Returns~a~nice~string~representation~for~the~dictionary"}\\ \mbox{~~~~keys=dic.keys();~keys.sort()~{\#}~sorts~the~keys}\\ \mbox{~~~~return~'{\textbackslash}n'.join(['{\%}s~=~{\%}s'~{\%}~(k,dic[k])~for~k~in~keys])}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} I encourage the use of this function in order to retrieve more information about the modules of the standard library: \begin{quote} \begin{verbatim}>>> from oopp import pretty >>> import time #look at the 'time' standard library module >>> print pretty(vars(time)) __doc__ = This module provides various functions to manipulate time values. There are two standard representations of time. One is the number of seconds since the Epoch, in UTC (a.k.a. GMT). It may be an integer or a floating point number (to represent fractions of seconds). The Epoch is system-defined; on Unix, it is generally January 1st, 1970. The actual value can be retrieved by calling gmtime(0). The other representation is a tuple of 9 integers giving local time. The tuple items are: year (four digits, e.g. 1998) month (1-12) day (1-31) hours (0-23) minutes (0-59) seconds (0-59) weekday (0-6, Monday is 0) Julian day (day in the year, 1-366) DST (Daylight Savings Time) flag (-1, 0 or 1) If the DST flag is 0, the time is given in the regular time zone; if it is 1, the time is given in the DST time zone; if it is -1, mktime() should guess based on the date and time. Variables: timezone -- difference in seconds between UTC and local standard time altzone -- difference in seconds between UTC and local DST time daylight -- whether local time should reflect DST tzname -- tuple of (standard time zone name, DST time zone name) Functions: time() -- return current time in seconds since the Epoch as a float clock() -- return CPU time since process start as a float sleep() -- delay for a number of seconds given as a float gmtime() -- convert seconds since Epoch to UTC tuple localtime() -- convert seconds since Epoch to local time tuple asctime() -- convert time tuple to string ctime() -- convert time in seconds to string mktime() -- convert local time tuple to seconds since Epoch strftime() -- convert time tuple to string according to format specification strptime() -- parse string to time tuple according to format specification __file__ = /usr/local/lib/python2.3/lib-dynload/time.so __name__ = time accept2dyear = 1 altzone = 14400 asctime = clock = ctime = daylight = 1 gmtime = localtime = mktime = newattr = hello! sleep = strftime = strptime = struct_time = time = timezone = 18000 tzname = ('EST', 'EDT')\end{verbatim} \end{quote} The list of the built-in Python types can be found in the \texttt{types} module: \begin{quote} \begin{verbatim}>>> import types >>> t_dict=dict([(k,v) for (k,v) in vars(types).iteritems() ... if k.endswith('Type')]) >>> for t in t_dict: print t, ... DictType IntType TypeType FileType CodeType XRangeType EllipsisType SliceType BooleanType ListType MethodType TupleType ModuleType FrameType StringType LongType BuiltinMethodType BufferType FloatType ClassType DictionaryType BuiltinFunctionType UnboundMethodType UnicodeType LambdaType DictProxyType ComplexType GeneratorType ObjectType FunctionType InstanceType NoneType TracebackType\end{verbatim} \end{quote} For a pedagogical account of the most elementary Python introspection features, Patrick O' Brien: \href{http://www-106.ibm.com/developerworks/linux/library/l-pyint.html}{http://www-106.ibm.com/developerworks/linux/library/l-pyint.html} %___________________________________________________________________________ \hypertarget{built-in-objects-iterators-and-generators}{} \pdfbookmark[1]{Built-in objects: iterators and generators}{built-in-objects-iterators-and-generators} \subsection*{Built-in objects: iterators and generators} At the end of the last section , I have used the \texttt{iteritems} method of the dictionary, which returns an iterator: \begin{quote} \begin{verbatim}>>> dict.iteritems.__doc__ 'D.iteritems() -> an iterator over the (key, value) items of D'\end{verbatim} \end{quote} Iterators (and generators) are new features of Python 2.2 and could not be familiar to all readers. However, since they are unrelated to OOP, they are outside the scope of this book and will not be discussed here in detail. Nevertheless, I will give a typical example of use of a generator, since this construct will be used in future chapters. At the syntactical level, a generator is a ``function'' with (at least one) \texttt{yield} statement (notice that in Python 2.2 the \texttt{yield} statement is enabled trough the \texttt{from {\_}{\_}future{\_}{\_} import generators} syntax): \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{import~re}\\ \mbox{}\\ \mbox{def~generateblocks(regexp,text):}\\ \mbox{~~~~"Generator~splitting~text~in~blocks~according~to~regexp"}\\ \mbox{~~~~start=0}\\ \mbox{~~~~for~MO~in~regexp.finditer(text):}\\ \mbox{~~~~~~~~beg,end=MO.span()}\\ \mbox{~~~~~~~~yield~text[start:beg]~{\#}~actual~text}\\ \mbox{~~~~~~~~yield~text[beg:end]~{\#}~separator}\\ \mbox{~~~~~~~~start=end}\\ \mbox{~~~~lastblock=text[start:]~}\\ \mbox{~~~~if~lastblock:~yield~lastblock;~yield~''}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} In order to understand this example, the reader my want to refresh his/her understanding of regular expressions; since this is not a subject for this book, I simply remind the meaning of \texttt{finditer}: \begin{quote} \begin{verbatim}>>> import re >>> help(re.finditer) finditer(pattern, string) Return an iterator over all non-overlapping matches in the string. For each match, the iterator returns a match object. Empty matches are included in the result.\end{verbatim} \end{quote} Generators can be thought of as resumable functions that stop at the \texttt{yield} statement and resume from the point where they left. \begin{quote} \begin{verbatim}>>> from oopp import generateblocks >>> text='Python_Rules!' >>> g=generateblocks(re.compile('_'),text) >>> g >>> dir(g) ['__class__', '__delattr__', '__doc__', '__getattribute__', '__hash__', '__init__', '__iter__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__str__', 'gi_frame', 'gi_running', 'next']\end{verbatim} \end{quote} Generator objects can be used as iterators in a \texttt{for} loop. In this example the generator takes a text and a regular expression describing a fixed delimiter; then it splits the text in blocks according to the delimiter. For instance, if the delimiter is '{\_}', the text 'Python Rules!' is splitted as 'Python', '{\_}' and 'Rules!': \begin{quote} \begin{verbatim}>>> for n, block in enumerate(g): print n, block ... 0 Python 1 2 Rules! 3\end{verbatim} \end{quote} This example also show the usage of the new Python 2.3 built-in \texttt{enumerate}. Under the hood the \texttt{for} loop is calling the generator via its \texttt{next} method, until the \texttt{StopIteration} exception is raised. For this reason a new call to the \texttt{for} loop will have no effect: \begin{quote} \begin{verbatim}>>> for n, block in enumerate(g): print n, block ...\end{verbatim} \end{quote} The point is that the generator has already yield its last element: \begin{quote} \begin{verbatim}>>> g.next() # error Traceback (most recent call last): File "", line 1, in ? StopIteration\end{verbatim} \end{quote} \texttt{generateblocks} always returns an even number of blocks; odd blocks are delimiters whereas even blocks are the intertwining text; there may be empty blocks, corresponding to the null string ''. It must be remarked the difference with the 'str.split' method \begin{quote} \begin{verbatim}>>> 'Python_Rules!'.split('_') ['Python', 'Rules!']\end{verbatim} \end{quote} and the regular expression split method: \begin{quote} \begin{verbatim}>>> re.compile('_').split('Python_Rules!') ['Python', 'Rules!']\end{verbatim} \end{quote} both returns lists with an odd number of elements and both miss the separator. The regular expression split method can catch the separator, if wanted, \begin{quote} \begin{verbatim}>>> re.compile('(_)').split('Python_Rules!') ['Python', '_', 'Rules!']\end{verbatim} \end{quote} but still is different from the generator, since it returns a list. The difference is relevant if we want to split a very large text, since the generator avoids to build a very large list and thus it is much more memory efficient (it is faster, too). Moreover, \texttt{generateblocks} works differently in the case of multiple groups: \begin{quote} \begin{verbatim}>>> delim=re.compile('(_)|(!)') #delimiter is space or exclamation mark >>> for n, block in enumerate(generateblocks(delim,text)): ... print n, block 0 Python 1 _ 2 Rules 3 !\end{verbatim} \end{quote} whereas \begin{quote} \begin{verbatim}>>> delim.split(text) ['Python', '_', None, 'Rules', None, '!', '']\end{verbatim} \end{quote} gives various unwanted \texttt{None} (which could be skipped with \texttt{[x for x in delim.split(text) if x is not None]}); notice, that there are no differences (apart from the fact that \texttt{delim.split(text)} has an odd number of elements) when one uses a single group regular expression: \begin{quote} \begin{verbatim}>>> delim=re.compile('(_|!)') >>> delim.split(text) ['Python', '_', 'Rules', '!', '']\end{verbatim} \end{quote} The reader unfamiliar with iterators and generators is encouraged to look at the standard documentation and other references. For instance, there are Alex Martelli's notes on iterators at \href{http://www.strakt.com/dev_talks.html}{http://www.strakt.com/dev{\_}talks.html} and there is a good article on generators by David Mertz \href{http://www-106.ibm.com/developerworks/linux/library/l-pycon.html}{http://www-106.ibm.com/developerworks/linux/library/l-pycon.html} %___________________________________________________________________________ \hypertarget{the-convenience-of-functions}{} \pdfbookmark[0]{THE CONVENIENCE OF FUNCTIONS}{the-convenience-of-functions} \section*{THE CONVENIENCE OF FUNCTIONS} Functions are the most basic Python objects. They are also the simplest objects where one can apply the metaprogramming techniques that are the subject of this book. The tricks used in this chapter and the utility functions defined here will be used over all the book. Therefore this is an \emph{essential} chapter. Since it is intended to be a gentle introduction, the tone will be informal. %___________________________________________________________________________ \hypertarget{id16}{} \pdfbookmark[1]{Introduction}{id16} \subsection*{Introduction} One could be surprised that a text on OOP begins with a chapter on the well known old-fashioned functions. In some sense, this is also against the spirit of an important trend in OOP, which tries to shift the focus from functions to data. In pure OOP languages, there are no more functions, only methods. [\hyperlink{id15}{8}] However, there are good reasons for that: \newcounter{listcnt11} \begin{list}{\arabic{listcnt11}.} { \usecounter{listcnt11} \setlength{\rightmargin}{\leftmargin} } \item {} In Python, functions \emph{are} objects. And particularly useful ones. \item {} Python functions are pretty powerful and all their secrets are probably \emph{not} well known to the average Python programmer. \item {} In the solutions of many problems, you don't need the full apparatus of OOP: good old functions can be enough. \end{list} Moreover, I am a believer in the multiparadigm approach to programming, in which you choose your tools according to your problem. With a bazooka you can kill a mosquito, yes, but this does not mean that you must use the bazooka \emph{always}. In certain languages, you have no choice, and you must define a class (involving a lot of boiler plate code) even for the most trivial application. Python's philosophy is to keep simple things simple, but having the capability of doing even difficult things with a reasonable amount of effort. The message of this chapter will be: ``use functions when you don't need classes''. Functions are good because: \newcounter{listcnt12} \begin{list}{\arabic{listcnt12}.} { \usecounter{listcnt12} \setlength{\rightmargin}{\leftmargin} } \item {} They are easy to write (no boiler plate); \item {} They are easy to understand; \item {} They can be reused in your code; \item {} Functions are an essential building block in the construction of objects. \end{list} Even if I think that OOP is an extremely effective strategy, with enormous advantages on design, maintanibility and reusability of code, nevertheless this book is \emph{not} intended to be a panegyric of OOP. There are cases in which you don't need OOP. I think the critical parameter is the size of the program. These are the rules I follows usually (to be taken as indicative): \newcounter{listcnt13} \begin{list}{\arabic{listcnt13}.} { \usecounter{listcnt13} \setlength{\rightmargin}{\leftmargin} } \item {} If I have to write a short script of 20-30 lines, that copies two or three files and prints some message, I use fast and dirty spaghetti-code; there is no use for OOP. \item {} If your script grows to one-hundred lines or more, I structure it write a few routines and a main program: but still I can live without OOP. \item {} If the script goes beyond the two hundred lines, I start collecting my routines in few classes. \item {} If the script goes beyond the five hundred lines, I split the program in various files and modules and convert it to a package. \item {} I never write a function longer than 50 lines, since 50 lines is more or less the size of a page in my editor, and I need to be able to see the entire function in a page. \end{list} Of course your taste could be different and you could prefer to write a monolitic program of five thousand lines; however the average size of the modules in the Python standard library is of 111 lines. I think this is a \emph{strong} suggestion towards a modular style of programming, which is \emph{very} well supported in Python. The point is that OOP is especially useful for \emph{large} programs: if you only use Python for short system administration scripts you may well live without OOP. Unfortunaly, as everybody knows, short scripts have an evil tendency to become medium size scripts, and medium size scripts have the even more evil tendency to become large scripts and possible even full featured applications ! For this reason it is very probable that at a certain moment you will feel the need for OOP. I remember my first big program, a long time ago: I wrote a program to draw mathematical functions in AmigaBasic. It was good and nice until it had size of few hundred lines; but when it passed a thousand of lines, it became rapidly unmanageable and unmaintenable. There where three problems: \newcounter{listcnt14} \begin{list}{\arabic{listcnt14}.} { \usecounter{listcnt14} \setlength{\rightmargin}{\leftmargin} } \item {} I could not split the program in modules, as I wanted, due to the limitations of AmigaBasic; \item {} I was missing OOP to keep the logic of the program all together, but at the time I didn't know that; \item {} I was missing effective debugging techniques. \item {} I was missing effective refactoring tools. \end{list} I am sure anybody who has ever written a large program has run in these limitations: and the biggest help of OOP is in overcoming these limitations. Obviously, miracles are impossible, and even object oriented programs can grow to a size where they become unmaintanable: the point is that the critical limit is much higher than the thousand lines of structured programs. I haven't yet reached the limit of unmanageability with Python. The fact that the standard library is 66492 lines long (as result from the total number of lines in \texttt{/usr/local/lib/python2.2/}), but it is still manageable, give me an hope ;-) \begin{quote} \begin{figure}[b]\hypertarget{id18}[9] However, one could argue that having functions distinguished from methods is the best thing to do, even in a strongly object-oriented world. For instance, generic functions can be used to implement multimethods. See for instance Lisp, Dylan and MultiJava. This latter is forced to introduce the concept of function outside a class, foreign to traditional Java, just to implement multimethods. \end{figure} \end{quote} %___________________________________________________________________________ \hypertarget{a-few-useful-functions}{} \pdfbookmark[1]{A few useful functions}{a-few-useful-functions} \subsection*{A few useful functions} It is always a good idea to have a set of useful function collected in a user defined module. The first function we want to have in our module is the \texttt{do{\_}nothing} function: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~do{\_}nothing(*args,**kw):~pass}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} This function accept a variable number of arguments and keywords (I defer the reader to the standard documentation if she is unfamiliar with these concept; this is \emph{not} another Python tutorial ;-) and return \texttt{None}. It is very useful for debugging purposes, when in a complex program you may want concentrate your attention to few crucial functions and set the non-relevant functions to \texttt{do{\_}nothing} functions. A second function which is useful in developing programs is a timer function. Very ofter indeed, we may want to determine the bottleneck parts of a program, we are interested in profiling them and in seeing if we can improve the speed by improving the algorithm, or by using a Python ``compiler'' such as Psyco, or if really we need to write a C extension. In my experience, I never needed to write a C extension, since Python is fast enough. Nevertheless, to profile a program is always a good idea and Python provides a profiler module in the stardard library with this aim. Still, it is convenient to have a set of user defined functions to test the execution speed of few selected routines (whereas the standard profiler profiles everything). We see from the standard library documentation that the current time can be retrieved from the \texttt{time} module: [\hyperlink{id18}{9}] \begin{quote} \begin{verbatim}>>> import time >>> time.asctime() 'Wed Jan 15 12:46:03 2003'\end{verbatim} \end{quote} Since we are not interested in the date but only in the time, we need a function to extract it. This is easily implemented: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{import~time}\\ \mbox{}\\ \mbox{def~get{\_}time():}\\ \mbox{~~~~"Return~the~time~of~the~system~in~the~format~HH:MM:SS"}\\ \mbox{~~~~return~time.asctime().split()[3]}\\ \mbox{}\\ \mbox{{\#}}\\ \mbox{}\\ \mbox{>>>~from~oopp~import~get{\_}time}\\ \mbox{>>>~get{\_}time()}\\ \mbox{'13:03:49'} \end{flushleft}\end{ttfamily} \end{quote} Suppose, for instance, we want to know how much it takes to Python to write a Gigabyte of data. This can be a quite useful benchmark to have an idea of the I/O bottlenecks in our system. Since to take in memory a file of a Gigabyte can be quite problematic, let me compute the time spent in writing 1024 files of one Megabyte each. To this aim we need a \texttt{writefile} function \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~writefile(fname,data):}\\ \mbox{~~~~f=file(fname,'w')}\\ \mbox{~~~~f.write(data)}\\ \mbox{~~~~f.close()}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} and timing function. The idea is to wrap the \texttt{writefile} function in a \texttt{with{\_}clock} function as follows: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~with{\_}clock(func,n=1):}\\ \mbox{~~~~def~{\_}(*args,**kw):~{\#}~this~is~a~closure}\\ \mbox{~~~~~~~~print~"Process~started~on",get{\_}time()}\\ \mbox{~~~~~~~~print~'~..~please~wait~..'}\\ \mbox{~~~~~~~~for~i~in~range(n):~func(*args,**kw)}\\ \mbox{~~~~~~~~print~"Process~ended~on",get{\_}time()}\\ \mbox{~~~~return~{\_}}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} The wrapper function \texttt{with{\_}clock} has converted the function \texttt{writefile} in a function \texttt{with{\_}clock(writefile)} which has the same arguments of \texttt{writefile}, but contains additional features: in this case timing capabilities. Technically speaking, the internal function \texttt{{\_}} is called a \emph{closure}. Closures are very common in functional languages and can be used in Python too, with very little effort [\hyperlink{id21}{10}]. I will use closures very often in the following, and I will use the convention of denoting with ``{\_}'' the inner function in the closure, since there is no reason of giving to it a descriptive name (the name 'with{\_}clock' in the outer function is descriptive enough). For the same, reason I do not use a docstring for ``{\_}''. If Python would allow multistatement lambda functions, ``{\_}'' would be a good candidate for an anonymous function. Here is an example of usage: \begin{quote} \begin{verbatim}>>> from oopp import * >>> data='*'*1024*1024 #one megabyte >>> with_clock(writefile,n=1024)('datafile',data) #. Process started on 21:20:01 .. please wait .. Process ended on 21:20:57\end{verbatim} \end{quote} This example shows that Python has written one Gigabyte of data (splitted in 1024 chunks of one Megabyte each) in less than a minute. However,the result depends very much on the filesystem. I always suggest people to profile their programs, since one \emph{always} find surprises. For instance, I have checked the performance of my laptop, a dual machine Windows 98 SE/ Red Hat Linux 7.3. The results are collected in the following table: \begin{quote} \begin{longtable}[c]{|p{0.21\locallinewidth}|p{0.26\locallinewidth}|p{0.30\locallinewidth}|} \hline \textbf{ Linux ext-3 } & \textbf{ FAT under Linux } & \textbf{ FAT under Windows 98 } \\ \hline \endhead %[visit_tbody] 24-25 s & 56-58 s & 86-88 s \\ \hline %[depart_tbody] \end{longtable} \end{quote} We see that Linux is \emph{much} faster: more than three times faster than Windows, using the same machine! Notice that the FAT filesystem under Linux (where it is \emph{not} native) is remarkably faster than the FAT under Windows 98, where it is native !! I think that now my readers can begin to understand why this book has been written under Linux and why I \emph{never} use Windows for programming (actually I use it only to see the DVD's ;-). I leave as an exercise for the reader to check the results on this script on their machine. Since my laptop is quite old, you will probably have much better performances (for instance on my linux desktop I can write a Gigabyte in less than 12 seconds!). However, there are \emph{always} surprises: my desktop is a dual Windows 2000 machine with three different filesystems, Linux ext-2, FAT and NTFS. Surprisingly enough, the NT filesystem is the more inefficient for writing, \emph{ten times slower} than Linux! \begin{quote} \begin{longtable}[c]{|p{0.21\locallinewidth}|p{0.26\locallinewidth}|p{0.30\locallinewidth}|} \hline \textbf{ Linux ext-2 } & \textbf{ FAT under Win2000 } & \textbf{ NTFS under Win2000 } \\ \hline \endhead %[visit_tbody] 11-12 s & 95-97 s & 117-120 s \\ \hline %[depart_tbody] \end{longtable} \end{quote} \begin{figure}[b]\hypertarget{id21}[10] Users of Python 2.3 can give a look to the new \texttt{datetime} module, if they are looking for a sophisticated clock/calendar. \end{figure} \begin{figure}[b]\hypertarget{id22}[11] There are good references on functional programming in Python; I suggest the Python Cookbook and the articles by David Mertz www.IBM.dW. \end{figure} %___________________________________________________________________________ \hypertarget{functions-are-objects}{} \pdfbookmark[1]{Functions are objects}{functions-are-objects} \subsection*{Functions are objects} As we said in the first chapter, objects have attributes accessible with the dot notation. This is not surprising at all. However, it could be surprising to realize that since Python functions are objects, they can have attributes, too. This could be surprising since this feature is quite uncommon: typically or i) the language is not object-oriented, and therefore functions are not objects, or ii) the language is strongly object-oriented and does not have functions, only methods. Python is a multiparadigm language (which I prefer to the term ``hybrid'' language), therefore it has functions that are objects, as in Lisp and other functional languages. Consider for instance the \texttt{get{\_}time} function. That function has at least an useful attribute, its doctring: \begin{quote} \begin{verbatim}>>> from oopp import get_time >>> print get_time.func_doc Return the time of the system in the format HH:MM:SS\end{verbatim} \end{quote} The docstring can also be obtained with the \texttt{help} function: \begin{quote} \begin{verbatim}>>> help(get_time) Help on function get_time in module oopp: get_time() Return the time of the system in the format HH:MM:SS\end{verbatim} \end{quote} Therefore \texttt{help} works on user-defined functions, too, not only on built-in functions. Notice that \texttt{help} also returns the argument list of the function. For instance, this is the help message on the \texttt{round} function that we will use in the following: \begin{quote} \begin{verbatim}>>> help(round) Help on built-in function round: round(...) round(number[, ndigits]) -> floating point number Round a number to a given precision in decimal digits (default 0 digits).This always returns a floating point number. Precision may be negative.\end{verbatim} \end{quote} I strongly recommend Python programmers to use docstrings, not only for clarity sake during the development, but especially because it is possible to automatically generate nice HTML documentation from the docstrings, by using the standard tool ``pydoc''. One can easily add attributes to a function. For instance: \begin{quote} \begin{verbatim}>>> get_time.more_doc='get_time invokes the function time.asctime' >>> print get_time.more_doc get_time invokes the function time.asctime\end{verbatim} \end{quote} Attributes can be functions, too: \begin{quote} \begin{verbatim}>>> def IamAfunction(): print "I am a function attached to a function" >>> get_time.f=IamAfunction >>> get_time.f() I am a function attached to a function\end{verbatim} \end{quote} This is a quite impressive potentiality of Python functions, which has no direct equivalent in most other languages. One possible application is to fake C ``static'' variables. Suppose for instance we need a function remembering how may times it is called: we can simply use \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~double(x):}\\ \mbox{~~~~try:~{\#}look~if~double.counter~is~defined}\\ \mbox{~~~~~~~~double.counter}\\ \mbox{~~~~except~AttributeError:}\\ \mbox{~~~~~~~~double.counter=0~{\#}first~call}\\ \mbox{~~~~double.counter+=1}\\ \mbox{~~~~return~2*x}\\ \mbox{}\\ \mbox{double(double(2))}\\ \mbox{print~"double~has~been~called~{\%}s~times"~{\%}~double.counter}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} with output \texttt{double has been called 2 times}. A more elegant approach involves closures. A closure can enhance an ordinary function, providing to it the capability of remembering the results of its previous calls and avoiding the duplication of computations: \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~withmemory(f):}\\ \mbox{~~~~"""This~closure~invokes~the~callable~object~f~only~if~need~there~is"""}\\ \mbox{~~~~argskw=[];~result=[]}\\ \mbox{~~~~def~{\_}(*args,**kw):~}\\ \mbox{~~~~~~~~akw=args,kw}\\ \mbox{~~~~~~~~try:~{\#}~returns~a~previously~stored~result}\\ \mbox{~~~~~~~~~~~~i=argskw.index(akw)}\\ \mbox{~~~~~~~~except~ValueError:~{\#}~there~is~no~previously~stored~result}\\ \mbox{~~~~~~~~~~~~res=f(*args,**kw)~~{\#}~returns~the~new~result}\\ \mbox{~~~~~~~~~~~~argskw.append(akw)~{\#}~update~argskw}\\ \mbox{~~~~~~~~~~~~result.append(res)~{\#}~update~result}\\ \mbox{~~~~~~~~~~~~return~res}\\ \mbox{~~~~~~~~else:}\\ \mbox{~~~~~~~~~~~~return~result[i]~~}\\ \mbox{~~~~{\_}.argskw=argskw~{\#}makes~the~argskw~list~accessible~outside}\\ \mbox{~~~~{\_}.result=result~{\#}makes~the~result~list~accessible~outside}\\ \mbox{~~~~return~{\_}}\\ \mbox{}\\ \mbox{def~memoize(f):}\\ \mbox{~~~~"""This~closure~remembers~all~f~invocations"""}\\ \mbox{~~~~argskw,result~=~[],[]}\\ \mbox{~~~~def~{\_}(*args,**kw):~}\\ \mbox{~~~~~~~~akw=args,kw}\\ \mbox{~~~~~~~~try:~{\#}~returns~a~previously~stored~result}\\ \mbox{~~~~~~~~~~~~return~result[argskw.index(akw)]}\\ \mbox{~~~~~~~~except~ValueError:~{\#}~there~is~no~previously~stored~result}\\ \mbox{~~~~~~~~~~~~argskw.append(akw)~{\#}~update~argskw}\\ \mbox{~~~~~~~~~~~~result.append(f(*args,**kw))~{\#}~update~result}\\ \mbox{~~~~~~~~~~~~return~result[-1]~{\#}~return~the~new~result}\\ \mbox{~~~~{\_}.argskw=argskw~{\#}makes~the~argskw~list~accessible~outside}\\ \mbox{~~~~{\_}.result=result~{\#}makes~the~result~list~accessible~outside}\\ \mbox{~~~~return~{\_}}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} Now, if we call the wrapped function \texttt{f} twice with the same arguments, Python can give the result without repeating the (possibly very long) computation. \begin{quote} \begin{verbatim}>>> def f(x): ... print 'called f' ... return x*x >>> wrapped_f=withmemory(f) >>> wrapped_f(2) #first call with the argument 2; executes the computation called f 4 >>> wrapped_f(2) #does not repeat the computation 4 >>> wrapped_f.result [4] >>> wrapped_f.argskw [((2,), {})]\end{verbatim} \end{quote} %___________________________________________________________________________ \hypertarget{profiling-functions}{} \pdfbookmark[1]{Profiling functions}{profiling-functions} \subsection*{Profiling functions} The \texttt{with{\_}clock} function provided before was intended to be pedagogical; as such it is a quite poor solution to the problem of profiling a Python routine. A better solution involves using two others functions in the time library, \texttt{time.time()} that gives that time in seconds elapsed from a given date, and \texttt{time.clock()} that gives the time spent by the CPU in a given computation. Notice that \texttt{time.clock()} has not an infinite precision (the precision depends on the system) and one should expect relatively big errors if the function runs in a very short time. That's the reason why it is convenient to execute multiple times short functions and divide the total time by the number of repetitions. Moreover, one should subtract the overhead do to the looping. This can be computed with the following routine: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~loop{\_}overhead(N):}\\ \mbox{~~~~"Computes~the~time~spent~in~empty~loop~of~N~iterations"}\\ \mbox{~~~~t0=time.clock()}\\ \mbox{~~~~for~i~in~xrange(N):~pass}\\ \mbox{~~~~return~time.clock()-t0}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} For instance, on my laptop an empty loop of one million of iterations is performed in 1.3 seconds. Typically the loop overhead is negligible, whereas the real problem is the function overhead. Using the attribute trick discussed above, we may define a \texttt{with{\_}timer} function that enhances quite a bit \texttt{with{\_}clock}: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~with{\_}timer(func,~modulename='{\_}{\_}main{\_}{\_}',~n=1,~logfile=sys.stdout):}\\ \mbox{~~~~"""Wraps~the~function~func~and~executes~it~n~times~(default~n=1).~}\\ \mbox{~~~~The~average~time~spent~in~one~iteration,~express~in~milliseconds,~}\\ \mbox{~~~~is~stored~in~the~attributes~func.time~and~func.CPUtime,~and~saved~}\\ \mbox{~~~~in~a~log~file~which~defaults~to~the~standard~output.}\\ \mbox{~~~~"""}\\ \mbox{~~~~def~{\_}(*args,**kw):~{\#}~anonymous~function}\\ \mbox{~~~~~~~~time1=time.time()}\\ \mbox{~~~~~~~~CPUtime1=time.clock()}\\ \mbox{~~~~~~~~print~'Executing~{\%}s.{\%}s~...'~{\%}~(modulename,func.{\_}{\_}name{\_}{\_}),}\\ \mbox{~~~~~~~~for~i~in~xrange(n):~res=func(*args,**kw)~{\#}~executes~func~n~times}\\ \mbox{~~~~~~~~time2=time.time()}\\ \mbox{~~~~~~~~CPUtime2=time.clock()}\\ \mbox{~~~~~~~~func.time=1000*(time2-time1)/n}\\ \mbox{~~~~~~~~func.CPUtime=1000*(CPUtime2-CPUtime1-loop{\_}overhead(n))/n}\\ \mbox{~~~~~~~~if~func.CPUtime<10:~r=3~{\#}better~rounding}\\ \mbox{~~~~~~~~else:~r=1~{\#}default~rounding}\\ \mbox{~~~~~~~~print~>>~logfile,~'Real~time:~{\%}s~ms'~{\%}~round(func.time,r),}\\ \mbox{~~~~~~~~print~>>~logfile,~'~CPU~time:~{\%}s~ms'~{\%}~round(func.CPUtime,r)}\\ \mbox{~~~~~~~~return~res}\\ \mbox{~~~~return~{\_}}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Here it is an example of application: \begin{quote} \begin{verbatim}>>> from oopp import with_timer,writefile >>> data='*'*1024*1024 #one megabyte >>> with_timer(writefile,n=1024)('datafile',data) #. Executing writefile ... Real time: 60.0 ms CPU time: 42.2 ms\end{verbatim} \end{quote} The CPU time can be quite different from the real time, as you can see in the following example: \begin{quote} \begin{verbatim}>>> import time >>> def sleep(): time.sleep(1) ... >>> with_timer(sleep)() #. Executing sleep ... Real time: 999.7 ms CPU time: 0.0 ms\end{verbatim} \end{quote} We see that Python has run for 999.7 ms (i.e. 1 second, up to approximation errors in the system clock) during which the CPU has worked for 0.0 ms (i.e. the CPU took a rest ;-). The CPU time is the relevant time to use with the purpose of benchmarking Python speed. I should notice that the approach pursued in \texttt{with{\_}timer} is still quite simple. A better approach would be to plot the time versus the number of iteration, do a linear interpolation and extract the typical time for iteration from that. This allows to check visually that the machine is not doing something strange during the execution time and it is what I do in my personal benchmark routine; doing something similar is left as an exercise for the reader ;-). Another approach is to use the \texttt{timeit.py} module (new in Python 2.3, but works also with Python 2.2): \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{import~timeit,{\_}{\_}main{\_}{\_},warnings}\\ \mbox{}\\ \mbox{warnings.filterwarnings('ignore',}\\ \mbox{'import~{\textbackslash}*~only~allowed~at~module~level',SyntaxWarning)}\\ \mbox{}\\ \mbox{def~timeit{\_}(stmt,setup='from~{\_}{\_}main{\_}{\_}~import~*',n=1000):}\\ \mbox{~~~~t=timeit.Timer(stmt,setup)}\\ \mbox{~~~~try:~print~t.repeat(number=n)~{\#}~class~timeit~3~times}\\ \mbox{~~~~except:~t.print{\_}exc()}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} It is often stated that Python is slow and quite ineffective in application involving hard computations. This is generally speaking true, but how bad is the situation ? To test the (in)efficiency of Python on number crunching, let me give a function to compute the Mandelbrot set, which I have found in the Python Frequently Asked Question (FAQ 4.15. \emph{Is it possible to write obfuscated one-liners in Python?}). This function is due to Ulf Bartelt and you should ask him to know how does it work ;-) \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~mandelbrot(row,col):}\\ \mbox{~~~~"Computes~the~Mandelbrot~set~in~one~line"}\\ \mbox{~~~~return~(lambda~Ru,Ro,Iu,Io,IM,Sx,Sy:reduce(}\\ \mbox{~~~~~~~~lambda~x,y:x+y,map(lambda~y,Iu=Iu,Io=Io,Ru=Ru,Ro=Ro,Sy=Sy,L=}\\ \mbox{~~~~~~~~lambda~yc,Iu=Iu,Io=Io,Ru=Ru,Ro=Ro,i=IM,~Sx=Sx,Sy=Sy:reduce(}\\ \mbox{~~~~~~~~lambda~x,y:x+y,map(lambda~x,xc=Ru,yc=yc,Ru=Ru,Ro=Ro,~i=i,}\\ \mbox{~~~~~~~~Sx=Sx,F=lambda~xc,yc,x,y,k,f=lambda~xc,yc,x,y,k,f:(k<=0)}\\ \mbox{~~~~~~~~or~(x*x+y*y>=4.0)~or~1+f(xc,yc,x*x-y*y+xc,2.0*x*y+yc,k-1,f):}\\ \mbox{~~~~~~~~f(xc,yc,x,y,k,f):chr(64+F(Ru+x*(Ro-Ru)/Sx,yc,0,0,i)),}\\ \mbox{~~~~~~~~range(Sx))):L(Iu+y*(Io-Iu)/Sy),range(Sy))))(}\\ \mbox{~~~~~~~~-2.1,~0.7,~-1.2,~1.2,~30,~col,~row)}\\ \mbox{~~~~{\#}~~~~{\textbackslash}{\_}{\_}{\_}~{\_}{\_}{\_}/~~{\textbackslash}{\_}{\_}{\_}~{\_}{\_}{\_}/~~|~~~|~~~~|{\_}~lines~on~screen}\\ \mbox{~~~~{\#}~~~~~~~~V~~~~~~~~~~V~~~~~~|~~~|{\_}{\_}{\_}{\_}{\_}{\_}~columns~on~screen}\\ \mbox{~~~~{\#}~~~~~~~~|~~~~~~~~~~|~~~~~~|{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}~maximum~of~"iterations"}\\ \mbox{~~~~{\#}~~~~~~~~|~~~~~~~~~~|{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}~range~on~y~axis}\\ \mbox{~~~~{\#}~~~~~~~~|{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}~range~on~x~axis}\\ \mbox{~~~~}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Here there is the benchmark on my laptop: \begin{quote} \begin{verbatim}>>> from oopp import mandelbrot,with_timer >>> row,col=24,75 >>> output=with_timer(mandelbrot,n=1)(row,col) Executing __main__.mandelbrot ... Real time: 427.9 ms CPU time: 410.0 ms >>> for r in range(row): print output[r*col:(r+1)*col] ... BBBBBBBBBBBBBBCCCCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDCCCCCCCCCCCCCC BBBBBBBBBBBBCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDEEEEEEFGYLFFFEEEEEDDDDDCCCCCCCCC BBBBBBBBBBCCCCCCCDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEFFFGIKNJLLGEEEEEEDDDDDDCCCCC BBBBBBBBBCCCCCDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEFFFFGHJJR^QLIHGFFEEEEEEDDDDDDCC BBBBBBBBCCCDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEFFFGGGHIK_______LHGFFFFFEEEEDDDDDD BBBBBBBCCDDDDDDDDDDDDDDDDDDDDEEEEEEEFFFGHILIIIJJKMS_____PLJJIHGGGHJFEEDDDDD BBBBBBCDDDDDDDDDDDDDDDDDDEEEEEFFFFFFGGGHMQ__T________________QLOUP[OGFEDDDD BBBBBCDDDDDDDDDDDDDDDEEEFFFFFFFFFGGGGHJNM________________________XLHGFFEEDD BBBBCDDDDDDDDDEEEEEFFGJKHHHHHHHHHHHHIKN[__________________________MJKGFEEDD BBBBDDDDEEEEEEEEFFFFGHIKPVPMNU_QMJJKKZ_____________________________PIGFEEED BBBCDEEEEEEEEFFFFFFHHHML___________PQ_______________________________TGFEEEE BBBDEEEEEEFGGGGHHHJPNQP^___________________________________________IGFFEEEE BBB_____________________________________________________________OKIHGFFEEEE BBBDEEEEEEFGGGGHHHJPNQP^___________________________________________IGFFEEEE BBBCDEEEEEEEEFFFFFFHHHML___________PQ_______________________________TGFEEEE BBBBDDDDEEEEEEEEFFFFGHIKPVPMNU_QMJJKKZ_____________________________PIGFEEED BBBBCDDDDDDDDDEEEEEFFGJKHHHHHHHHHHHHIKN[__________________________MJKGFEEDD BBBBBCDDDDDDDDDDDDDDDEEEFFFFFFFFFGGGGHJNM________________________XLHGFFEEDD BBBBBBCDDDDDDDDDDDDDDDDDDEEEEEFFFFFFGGGHMQ__T________________QLOUP[OGFEDDDD BBBBBBBCCDDDDDDDDDDDDDDDDDDDDEEEEEEEFFFGHILIIIJJKMS_____PLJJIHGGGHJFEEDDDDD BBBBBBBBCCCDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEFFFGGGHIK_______LHGFFFFFEEEEDDDDDD BBBBBBBBBCCCCCDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEFFFFGHJJR^QLIHGFFEEEEEEDDDDDDCC BBBBBBBBBBCCCCCCCDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEFFFGIKNJLLGEEEEEEDDDDDDCCCCC BBBBBBBBBBBBCCCCCCCCCDDDDDDDDDDDDDDDDDDDDDDEEEEEEFGYLFFFEEEEEDDDDDCCCCCCCCC\end{verbatim} \end{quote} I am willing to concede that this code is not typical Python code and actually it could be an example of \emph{bad} code, but I wanted a nice ASCII picture on my book ... :) Also, this prove that Python is not necessarily readable and easy to understand ;-) I leave for the courageous reader to convert the previous algorithm to C and measure the difference in speed ;-) %___________________________________________________________________________ \hypertarget{about-python-speed}{} \pdfbookmark[1]{About Python speed}{about-python-speed} \subsection*{About Python speed} The best way to improved the speed is to improve the algorithm; in this sense Python is an ideal language since it allows you to test many algorithms in an incredibly short time: in other words, the time you would spend fighting with the compiler in other languages, in Python can be used to improve the algorithm. However in some cases, there is little to do: for instance, in many problems one has to run lots of loops, and Python loops are horribly inefficients as compared to C loops. In this case the simplest possibility is to use Psyco. Psyco is a specialing Python compiler written by Armin Rigo. It works for 386 based processors and allows Python to run loops at C speed. Installing Psyco requires {\$}0.00 and ten minutes of your time: nine minutes to find the program, download it, and install it; one minute to understand how to use it. The following script explains both the usage and the advantages of Psyco: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{import~oopp,sys}\\ \mbox{try:~}\\ \mbox{~~~~import~psyco}\\ \mbox{except~ImportError:~}\\ \mbox{~~~~print~"Psyco~is~not~installed,~sorry."}\\ \mbox{else:}\\ \mbox{~~~~n=1000000~{\#}~1,000,000~loops}\\ \mbox{}\\ \mbox{~~~~without=oopp.loop{\_}overhead(n)~}\\ \mbox{~~~~print~"Without~Psyco:",without}\\ \mbox{}\\ \mbox{~~~~psyco.bind(oopp.loop{\_}overhead)~{\#}compile~the~empty{\_}loop}\\ \mbox{}\\ \mbox{~~~~with=oopp.loop{\_}overhead(n)~}\\ \mbox{~~~~print~"With~Psyco:",with}\\ \mbox{}\\ \mbox{~~~~print~'Speedup~=~{\%}sx'~{\%}~round(without/with,1)}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} The output is impressive: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{Without~Psyco:~1.3}\\ \mbox{With~Psyco:~0.02}\\ \mbox{Speedup~=~65.0x} \end{flushleft}\end{ttfamily} \end{quote} Notice that repeating the test, you will obtain different speedups. On my laptop, the speedup for an empty loop of 10,000,000 of iteration is of the order of 70x, which is the same speed of a C loop, actually (I checked it). On my desktop, I have even found a speedup of 94x ! However, I must say that Psyco has some limitations. The problem is the function call overhead. Psyco enhances the overhead and in some programs it can even \emph{worsen} the performance (this is way you should \emph{never} use the \texttt{psyco.jit()} function that wraps all the functions of your program: you should only wrap the bottleneck loops). Generally speaking, you should expect a much more modest improvement, a factor of 2 or 3 is what I obtain usually in my programs. Look at this second example, which essentially measure the function call overhead by invoking the \texttt{do{\_}nothing} function: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{import~oopp}\\ \mbox{try:~}\\ \mbox{~~~~import~psyco}\\ \mbox{except~ImportError:}\\ \mbox{~~~~print~"Psyco~is~not~installed,~sorry."}\\ \mbox{else:}\\ \mbox{~~~~n=10000~{\#}~10,000~loops}\\ \mbox{~}\\ \mbox{~~~~def~do{\_}nothing{\_}loop():}\\ \mbox{~~~~~~~~for~i~in~xrange(n):~oopp.do{\_}nothing()}\\ \mbox{}\\ \mbox{~~~~print~"Without~Psyco:{\textbackslash}n"}\\ \mbox{~~~~oopp.with{\_}timer(do{\_}nothing{\_}loop,n=5)()~{\#}50,000~times}\\ \mbox{}\\ \mbox{~~~~without=do{\_}nothing{\_}loop.CPUtime}\\ \mbox{}\\ \mbox{~~~~psyco.bind(do{\_}nothing{\_}loop)~}\\ \mbox{~~~~print~"With~Psyco:{\textbackslash}n"}\\ \mbox{~~~~oopp.with{\_}timer(do{\_}nothing{\_}loop,n=5)()~{\#}50,000~times}\\ \mbox{}\\ \mbox{~~~~with=do{\_}nothing{\_}loop.CPUtime}\\ \mbox{}\\ \mbox{~~~~print~'Speedup~=~{\%}sx'~{\%}~round(without/with,1)}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} The output is less incredible: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{Without~Psyco:}\\ \mbox{Executing~do{\_}nothing{\_}loop~...~Real~time:~138.2~ms~~CPU~time:~130.0~ms}\\ \mbox{With~Psyco:}\\ \mbox{Executing~do{\_}nothing{\_}loop~...~Real~time:~70.0~ms~~CPU~time:~68.0~ms}\\ \mbox{Speedup~=~1.9x} \end{flushleft}\end{ttfamily} \end{quote} However, this is still impressive, if you think that you can double the speed of your program by adding \emph{a line} of code! Moreover this example is not fair since Psyco cannot improve very much the performance for loops invoking functions with a variable number of arguments. On the other hand, it can do quite a lot for loops invoking functions with a fixed number of arguments. I have checked that you can easily reach speedups of 20x (!). The only disadvantage is that a program invoking Psyco takes much more memory, than a normal Python program, but this is not a problem for most applications in nowadays computers. Therefore, often Psyco can save you the effort of going trough a C extension. In some cases, however, there is no hope: I leave as an exercise for the reader to check (at least the version 0.4.1 I am using now) is unable to improve the performance on the Mandelbrot set example. This proves that in the case bad code, there is no point in using a compiler: you have to improve the algorithm first ! By the way, if you really want to go trough a C extension with a minimal departure from Python, you can use Pyrex by Greg Ewing. A Pyrex program is essentially a Python program with variable declarations that is automatically converted to C code. Alternatively, you can inline C functions is Python with \texttt{weave} of ... Finally, if you want to access C/C++ libraries, there tools like Swig, Booster and others. %___________________________________________________________________________ \hypertarget{tracing-functions}{} \pdfbookmark[1]{Tracing functions}{tracing-functions} \subsection*{Tracing functions} Typically, a script contains many functions that call themselves each other when some conditions are satisfied. Also, typically during debugging things do not work the way we would like and it is not clear which functions are called, in which order they are called, and which parameters are passed. The best way to know all these informations, is to trace the functions in our script, and to write all the relevant informations in a log file. In order to keep the distinction between the traced functions and the original one, it is convenient to collect all the wrapped functions in a separate dictionary. The tracing of a single function can be done with a closure like this: \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~with{\_}tracer(function,namespace='{\_}{\_}main{\_}{\_}',output=sys.stdout,~indent=[0]):}\\ \mbox{~~~~"""Closure~returning~traced~functions.~It~is~typically~invoked}\\ \mbox{~~~~trough~an~auxiliary~function~fixing~the~parameters~of~with{\_}tracer."""}\\ \mbox{~~~~def~{\_}(*args,**kw):}\\ \mbox{~~~~~~~~name=function.{\_}{\_}name{\_}{\_}}\\ \mbox{~~~~~~~~i='~'*indent[0];~indent[0]+=4~{\#}~increases~indentation}\\ \mbox{~~~~~~~~output.write("{\%}s[{\%}s]~Calling~'{\%}s'~with~arguments{\textbackslash}n"~{\%}~}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~(i,namespace,name))}\\ \mbox{~~~~~~~~output.write("{\%}s~{\%}s~...{\textbackslash}n"~{\%}~(i,str(args)+str(kw)))}\\ \mbox{~~~~~~~~res=function(*args,**kw)}\\ \mbox{~~~~~~~~output.write("{\%}s[{\%}s.{\%}s]~called~with~result:~{\%}s{\textbackslash}n"}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~{\%}~(i,namespace,name,str(res)))}\\ \mbox{~~~~~~~~indent[0]-=4~{\#}~restores~indentation}\\ \mbox{~~~~~~~~return~res}\\ \mbox{~~~~return~{\_}~{\#}~the~traced~function}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} Here is an example of usage: \begin{quote} \begin{verbatim}>>> from oopp import with_tracer >>> def fact(n): # factorial function ... if n==1: return 1 ... else: return n*fact(n-1) >>> fact=with_tracer(fact) >>> fact(3) [__main__] Calling 'fact' with arguments (3,){} ... [__main__] Calling 'fact' with arguments (2,){} ... [__main__] Calling 'fact' with arguments (1,){} ... [__main__.fact] called with result: 1 [__main__.fact] called with result: 2 [__main__.fact] called with result: 6 6\end{verbatim} \end{quote} The logic behind \texttt{with{\_}tracer} should be clear; the only trick is the usage of a default list as a way to store a global indentation parameter. Since \texttt{indent} is mutable, the value of \texttt{indent[0]} changes at any recursive call of the traced function, resulting in a nested display. Typically, one wants to trace all the functions in a given module; this can be done trough the following function: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{from~types~import~*}\\ \mbox{}\\ \mbox{isfunction=lambda~f:~isinstance(f,(FunctionType,BuiltinFunctionType))}\\ \mbox{}\\ \mbox{def~wrapfunctions(obj,wrapper,err=None,**options):}\\ \mbox{~~~~"Traces~the~callable~objects~in~an~object~with~a~dictionary"}\\ \mbox{~~~~namespace=options.get('namespace',getattr(obj,'{\_}{\_}name{\_}{\_}',''))}\\ \mbox{~~~~output=options.get('output',sys.stdout)}\\ \mbox{~~~~dic=dict([(k,wrapper(v,namespace,output))~}\\ \mbox{~~~~~~~~~~~~~~for~k,v~in~attributes(obj).items()~if~isfunction(v)])}\\ \mbox{~~~~customize(obj,err,**dic)}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Notice that 'wrapfunctions' accepts as first argument an object with a \texttt{{\_}{\_}dict{\_}{\_}} attribute (such as a module or a class) or with some explicit attributes (such as a simple object) and modifies it. One can trace a module as in this example: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{import~oopp,random}\\ \mbox{}\\ \mbox{oopp.wrapfunctions(random,oopp.with{\_}tracer)~}\\ \mbox{}\\ \mbox{random.random()}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} with output \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{[random]~Calling~'random'~with~arguments}\\ \mbox{(){\{}{\}}~...}\\ \mbox{->~'random.random'~called~with~result:~0.175450439202} \end{flushleft}\end{ttfamily} \end{quote} The beauty of the present approach is its generality: 'wrap' can be used to add any kind of capabilities to a pre-existing module. For instance, we could time the functions in a module, with the purpose of looking at the bottlenecks. To this aim, it is enough to use a 'timer' nested closure: An example of calling is \texttt{wrapfunction(obj,timer,iterations=1)}. We may also compose our closures; for instance one could define a \texttt{with{\_}timer{\_}and{\_}tracer} closure: \begin{quote} \begin{verbatim}>>> with_timer_and_tracer=lambda f: with_timer(with_tracer(f))\end{verbatim} \end{quote} It should be noticed that Python comes with a standard profiler (in my system it is located in \texttt{/usr/local/lib/python2.2/profile.py}) that allows to profile a script or a module (try python /usr/local/lib/python2.2/profile.py oopp.py) or \begin{quote} \begin{verbatim}>>> import profile; help(profile)\end{verbatim} \end{quote} and see the on-line documentation. %___________________________________________________________________________ \hypertarget{tracing-objects}{} \pdfbookmark[1]{Tracing objects}{tracing-objects} \subsection*{Tracing objects} In this section, I will give a more sophisticated example, in which one can easily understand why the Python ability of changing methods and attributes during run-time, is so useful. As a preparation to the real example, let me first introduce an utility routine that allows the user to add tracing capabilities to a given object. Needless to say, this feature can be invaluable during debugging, or in trying to understand the behaviour of a program written by others. This routine is a little complex and needs some explanation. \newcounter{listcnt15} \begin{list}{\arabic{listcnt15}.} { \usecounter{listcnt15} \setlength{\rightmargin}{\leftmargin} } \item {} The routine looks in the attributes of the object and try to access them. \item {} If the access is possible, the routines looks for methods (methods are recognized trough the \texttt{inspect.isroutine} function in the standard library) and ignores regular attributes; \item {} The routine try to override the original methods with improved ones, that possess tracing capabilities; \item {} the traced method is obtained with the wrapping trick discussed before. \end{list} I give now the real life example that I have anticipated before. Improvements and elaborations of this example can be useful to the professional programmer, too. Suppose you have an XML text you want to parse. Python provides excellent support for this kind of operation and various standard modules. One of the most common is the \texttt{expat} module (see the standard library documentation for more). If you are just starting using the module, it is certainly useful to have a way of tracing its behaviour; this is especially true if you you find some unexpected error during the parsing of a document (and this may happens even if you are an experience programmer ;-). The tracing routine just defined can be used to trace the parser, as it is exemplified in the following short script: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{import~oopp,~xml.parsers.expat,~sys}\\ \mbox{}\\ \mbox{{\#}~text~to~be~parsed}\\ \mbox{text{\_}xml="""{\textbackslash}}\\ \mbox{}\\ \mbox{}\\ \mbox{Text~goes~here}\\ \mbox{"""}\\ \mbox{}\\ \mbox{{\#}~a~few~do~nothing~functions}\\ \mbox{def~start(*args):~pass}\\ \mbox{def~end(*args):~pass}\\ \mbox{def~handler(*args):~pass}\\ \mbox{}\\ \mbox{{\#}~a~parser~object}\\ \mbox{p~=~xml.parsers.expat.ParserCreate()}\\ \mbox{}\\ \mbox{p.StartElementHandler~=~start}\\ \mbox{p.EndElementHandler~=~end}\\ \mbox{p.CharacterDataHandler~=~handler}\\ \mbox{}\\ \mbox{{\#}adds~tracing~capabilities~to~p}\\ \mbox{oopp.wrapfunctions(p,oopp.with{\_}tracer,~err=sys.stdout)}\\ \mbox{}\\ \mbox{p.Parse(text{\_}xml)}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} The output is: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{Error:~SetBase~cannot~be~set}\\ \mbox{Error:~Parse~cannot~be~set}\\ \mbox{Error:~ParseFile~cannot~be~set}\\ \mbox{Error:~GetBase~cannot~be~set}\\ \mbox{Error:~SetParamEntityParsing~cannot~be~set}\\ \mbox{Error:~ExternalEntityParserCreate~cannot~be~set}\\ \mbox{Error:~GetInputContext~cannot~be~set}\\ \mbox{[]~Calling~'start'~with~arguments}\\ \mbox{~(u'parent',~{\{}u'id':~u'dad'{\}}){\{}{\}}~...}\\ \mbox{[.start]~called~with~result:~None}\\ \mbox{[]~Calling~'handler'~with~arguments}\\ \mbox{~(u'{\textbackslash}n',){\{}{\}}~...}\\ \mbox{[.handler]~called~with~result:~None}\\ \mbox{[]~Calling~'start'~with~arguments}\\ \mbox{~(u'child',~{\{}u'name':~u'kid'{\}}){\{}{\}}~...}\\ \mbox{[.start]~called~with~result:~None}\\ \mbox{[]~Calling~'handler'~with~arguments}\\ \mbox{~(u'Text~goes~here',){\{}{\}}~...}\\ \mbox{[.handler]~called~with~result:~None}\\ \mbox{[]~Calling~'end'~with~arguments}\\ \mbox{~(u'child',){\{}{\}}~...}\\ \mbox{[.end]~called~with~result:~None}\\ \mbox{[]~Calling~'handler'~with~arguments}\\ \mbox{~(u'{\textbackslash}n',){\{}{\}}~...}\\ \mbox{[.handler]~called~with~result:~None}\\ \mbox{[]~Calling~'end'~with~arguments}\\ \mbox{~(u'parent',){\{}{\}}~...}\\ \mbox{[.end]~called~with~result:~None} \end{flushleft}\end{ttfamily} \end{quote} This is a case where certain methods cannot be managed with \texttt{getattr/setattr}, because they are internally coded in C: this explain the error messages at the beginning. I leave as an exercise for the reader to understand the rest ;-) %___________________________________________________________________________ \hypertarget{inspecting-functions}{} \pdfbookmark[1]{Inspecting functions}{inspecting-functions} \subsection*{Inspecting functions} Python wonderful introspection features are really impressive when applied to functions. It is possible to extract a big deal of informations from a Python function, by looking at its associated \emph{code object}. For instance, let me consider my, \texttt{do{\_}nothing} function: its associated code object can be extracted from the \texttt{func{\_}code} attribute: \begin{quote} \begin{verbatim}>>> from oopp import * >>> co=do_nothing.func_code # extracts the code object >>> co >>> type(co) \end{verbatim} \end{quote} The code object is far being trivial: the docstring says it all: \begin{quote} \begin{verbatim}>>> print type(co).__doc__ code(argcount, nlocals, stacksize, flags, codestring, constants, names, varnames, filename, name, firstlineno, lnotab[, freevars[, cellvars]]) Create a code object. Not for the faint of heart.\end{verbatim} \end{quote} In the case of my \texttt{do{\_}nothing} function, the code object possesses the following attributes: \begin{quote} \begin{verbatim}>>> print pretty(attributes(co)) co_argcount = 0 co_cellvars = () co_code = dS co_consts = (None,) co_filename = oopp.py co_firstlineno = 48 co_flags = 15 co_freevars = () co_lnotab = co_name = do_nothing co_names = () co_nlocals = 2 co_stacksize = 1 co_varnames = ('args', 'kw')\end{verbatim} \end{quote} Some of these arguments are pretty technical and implementation dependent; however, some of these are pretty clear and useful: \begin{quote} \begin{itemize} \item {} co{\_}argcount is the total number of arguments \item {} co{\_}filename is the name of the file where the function is defined \item {} co{\_}firstlineno is the line number where the function is defined \item {} co{\_}name is the name of the function \item {} co{\_}varnames are the names \end{itemize} \end{quote} The programmer that it is not a ``faint of heart'' can study the built-in documentation on code objects; s/he should try \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{for~k,v~in~attributes(co).iteritems():~print~k,':',v.{\_}{\_}doc{\_}{\_},'{\textbackslash}n'} \end{flushleft}\end{ttfamily} {\#} does not work now !! \begin{ttfamily}\begin{flushleft} \mbox{add=[lambda~x,i=i:~x+i~for~i~in~range(10)]}\\ \mbox{}\\ \mbox{>>>~def~f(y):}\\ \mbox{...~~~~return~lambda~x:~x+y}\\ \mbox{...}\\ \mbox{>>>~f(1).func{\_}closure~{\#}closure~cell~object}\\ \mbox{(,)} \end{flushleft}\end{ttfamily} \end{quote} func.defaults, closure, etc. {\#}how to extract (non-default) arguments as help does. print (lambda:None).func{\_}code.co{\_}filename One cannot change the name of a function: \begin{quote} \begin{verbatim}>>> def f(): pass ... >>> f.__name__='ciao' # error Traceback (most recent call last): File "", line 1, in ? TypeError: readonly attribute\end{verbatim} \end{quote} However, one can create a copy with a different name: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~copyfunc(f,newname=None):~{\#}~works~under~Python~2.3}\\ \mbox{~~~~if~newname~is~None:~newname=f.func{\_}name~{\#}~same~name}\\ \mbox{~~~~return~FunctionType(f.func{\_}code,~globals(),~newname,~}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~f.func{\_}defaults,~f.func{\_}closure)}\\ \mbox{}\\ \mbox{{\#}}\\ \mbox{}\\ \mbox{>>>~copyfunc(f,newname='f2')}\\ \mbox{} \end{flushleft}\end{ttfamily} \end{quote} Notice that the \texttt{copy} module would not do the job: \begin{quote} \begin{verbatim}>>> import copy >>> copy.copy(f) # error Traceback (most recent call last): File "", line 1, in ? File "/usr/local/lib/python2.3/copy.py", line 84, in copy y = _reconstruct(x, reductor(), 0) File "/usr/local/lib/python2.3/copy_reg.py", line 57, in _reduce raise TypeError, "can't pickle %s objects" % base.__name__ TypeError: can't pickle function objects\end{verbatim} \end{quote} %___________________________________________________________________________ \hypertarget{the-beauty-of-objects}{} \pdfbookmark[0]{THE BEAUTY OF OBJECTS}{the-beauty-of-objects} \section*{THE BEAUTY OF OBJECTS} In this chapter I will show how to define generic objects in Python, and how to manipulate them. %___________________________________________________________________________ \hypertarget{user-defined-objects}{} \pdfbookmark[1]{User defined objects}{user-defined-objects} \subsection*{User defined objects} In Python, one cannot directly modify methods and attributes of built-in types, since this would be a potentially frightening source of bugs. Imagine for instance of changing the sort method of a list and invoking an external module expecting the standard sort: all kind of hideous outcome could happen. Nevertheless, in Python, as in all OOP languages, the user can define her own kind of objects, customized to satisfy her needs. In order to define a new object, the user must define the class of the objects she needs. The simplest possible class is a do-nothing class: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~Object(object):}\\ \mbox{~~~~"A~convenient~Object~class"}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Elements of the \texttt{Object} class can be created (instantiated) quite simply: \begin{quote} \begin{verbatim}>>> from oopp import Object >>> obj1=Object() >>> obj1 >>> obj2=Object() obj2 \end{verbatim} \end{quote} Notice that the hexadecimal number 0x81580ec is nothing else that the unique object reference to \texttt{obj1} \begin{quote} \begin{verbatim}>>> hex(id(obj1)) '0x81580ec'\end{verbatim} \end{quote} whereas 0x8156704 is the object reference of \texttt{obj2}: \begin{quote} \begin{verbatim}>>> hex(id(obj2)) '0x8156704'\end{verbatim} \end{quote} However, at this point \texttt{obj1} and \texttt{obj2} are generic doing nothing objects . Nevertheless, they have at least an useful attribute, the class docstring: \begin{quote} \begin{verbatim}>>> obj1.__doc__ #obj1 docstring 'A convenient Object class' >>> obj2.__doc__ # obj2 docstring: it's the same 'A convenient Object class'\end{verbatim} \end{quote} Notice that the docstring is associate to the class and therefore all the instances share the same docstring, unless one explicitly assigns a different docstring to some instance. \texttt{{\_}{\_}doc{\_}{\_}} is a class attribute (or a static attribute for readers familiar with the C++/Java terminology) and the expression is actually syntactic sugar for \begin{quote} \begin{verbatim}>>> class Object(object): # with explicit assignement to __doc__ ... __doc__ = "A convenient Object class"\end{verbatim} \end{quote} Since instances of 'Object' can be modified, I can transform them in anything I want. For instance, I can create a simple clock: \begin{quote} \begin{verbatim}>>> myclock=Object() >>> myclock <__main__.Object object at 0x8124614>\end{verbatim} \end{quote} A minimal clock should at least print the current time on the system. This is given by the \texttt{get{\_}time} function we defined in the first chapter. We may ``attach'' that function to our clock as follows: \begin{quote} \begin{verbatim}>>> import oopp >>> myclock.get_time=oopp.get_time >>> myclock.get_time # this is a function, not a method \end{verbatim} \end{quote} In other words, we have converted the \texttt{oopp.get{\_}time} function to a \texttt{get{\_}time} function of the object \texttt{myclock}. The procedure works \begin{quote} \begin{verbatim}>>> myclock.get_time() '15:04:57'\end{verbatim} \end{quote} but has a disadvantage: if we instantiate another clock \begin{quote} \begin{verbatim}>>> from oopp import Object >>> otherclock=Object()\end{verbatim} \end{quote} the other clock will \texttt{not} have a get{\_}time method: \begin{quote} \begin{verbatim}>>> otherclock.get_time() #first attempt; error AttributeError: 'Object' object has no attribute 'get_time'\end{verbatim} \end{quote} Notice instead that the docstring is a \emph{class attribute}, i.e. it is defined both for the class and \emph{all instances} of the class, therefore even for \texttt{otherclock}: \begin{quote} \begin{verbatim}>>> Object.__doc__ 'A convenient Object class' >>> otherclock.__doc__ 'A convenient Object class'\end{verbatim} \end{quote} We would like to convert the \texttt{get{\_}time} function to a \texttt{get{\_}time} method for the \emph{entire} class 'Object', i.e. for all its instances. Naively, one would be tempted to write the following: \begin{quote} \begin{verbatim}>>> Object.get_time=oopp.get_time\end{verbatim} \end{quote} However this would not work: \begin{quote} \begin{verbatim}>>> otherclock.get_time() #second attempt; still error Traceback (most recent call last): File "", line 1, in ? TypeError: oopp.get_time() takes no arguments (1 given)\end{verbatim} \end{quote} This error message is something that all Python beginners encounter (and sometimes even non-beginners ;-). The solution is to introduce an additional argument: \begin{quote} \begin{verbatim}>>> Object.get_time=lambda self : oopp.get_time() >>> otherclock.get_time # this is method now, not a function of <__main__.Object object at 0x815881c>> >>> otherclock.get_time() #third attempt '15:28:41'\end{verbatim} \end{quote} Why this works ? The explanation is the following: when Python encounters an expression of the form \texttt{objectname.methodname()} it looks if there is a already a method \emph{attached} to the object: \begin{quote} \newcounter{listcnt16} \begin{list}{\alph{listcnt16}.} { \usecounter{listcnt16} \setlength{\rightmargin}{\leftmargin} } \item {} if yes it invokes it with no arguments (this is why our first example worked); \item {} if not it looks at the class of the object; if there is a method bound to the class it invokes that method \emph{by passing the object as first argument}. \end{list} \end{quote} When we invoked \texttt{otherclock.get{\_}time()} in our second attempt, Python found that the function \texttt{get{\_}time} was defined at the class level, and sent it the \texttt{otherclock} object as first argument: however \texttt{get{\_}time} was bind to \texttt{func{\_}get{\_}time}, which is function with \emph{no} arguments: whence the error message. The third attempt worked since, thanks to the lambda function trick, the \texttt{get{\_}time} function has been converted to a function accepting a first argument. Therefore that's the rule: in Python, one can define methods at the class level, provided one explitely introduces a first argument containing the object on which the method is invoked. This first argument is traditionally called \texttt{self}; the name 'self' is not enforced, one could use any other valid Python identifier, however the convention is so widespread that practically everybody uses it; pychecker will even raise a warning in the case you don't follow the convention. I have just shown one the most interesting features of Python, its \emph{dynamicity}: you can create the class first and add methods to it later. That logic cannot be followed in typical compiled language as C++. On the other hand, one can also define methods in a static, more traditional way: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{"Shows~how~to~define~methods~inside~the~class~(statically)"}\\ \mbox{}\\ \mbox{import~oopp}\\ \mbox{}\\ \mbox{class~Clock(object):}\\ \mbox{~~~~'Clock~class;~version~0.1'}\\ \mbox{~~~~def~get{\_}time(self):~{\#}~method~defined~inside~the~class}\\ \mbox{~~~~~~~~return~oopp.get{\_}time()}\\ \mbox{}\\ \mbox{myclock=Clock()~{\#}creates~a~Clock~instance}\\ \mbox{print~myclock.get{\_}time()~{\#}~print~the~current~time}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} In this case we have defined the \texttt{get{\_}time} method inside the class as a normal function with an explicit first argument called self; this is entirely equivalent to the use of a lambda function. The syntax \texttt{myclock.get{\_}time()} is actually syntactic sugar for \texttt{Clock.get{\_}time(myclock)}. In this second form, it is clear the \texttt{get{\_}time} is really ``attached'' to the class, not to the instance. %___________________________________________________________________________ \hypertarget{objects-have-static-methods-and-classmethods}{} \pdfbookmark[1]{Objects have static methods and classmethods}{objects-have-static-methods-and-classmethods} \subsection*{Objects have static methods and classmethods} \begin{quote} \begin{flushleft} \emph{There~should~be~one--and~preferably~only~one--obvious~way~to~do~it}~\\ --~Tim~Peters,~\emph{The~Zen~of~Python}. \end{flushleft} \end{quote} For any rule, there is an exception, and despite the Python's motto there are many ways to define methods in classes. The way I presented before was the obvious one before the Python 2.2 revolution; however, nowadays there is another possibility that, even if less obvious, has the advantage of some elegance (and it is also slightly more efficient too, even if efficiency if never a primary concern for a Python programmer). We see that the first argument in the \texttt{get{\_}time} method is useless, since the time is computed from the \texttt{time.asctime()} function which does not require any information about the object that is calling it. This waste is ugly, and since according to the Zen of Python \begin{quote} \emph{Beautiful is better than ugly.} \end{quote} we should look for another way. The solution is to use a \emph{static method}: when a static method is invoked, the calling object is \emph{not} implicitly passed as first argument. Therefore we may use a normal function with no additional first argument to define the \texttt{get{\_}time} method: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~Clock(object):}\\ \mbox{~~~~'Clock~with~a~staticmethod'}\\ \mbox{~~~~get{\_}time=staticmethod(get{\_}time)}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Here is how it works: \begin{quote} \begin{verbatim}>>> from oopp import Clock >>> Clock().get_time() # get_time is bound both to instances '10:34:23' >>> Clock.get_time() # and to the class '10:34:26'\end{verbatim} \end{quote} The staticmethod idiom converts the lambda function to a static method of the class 'Clock'. Notice that one can avoid the lambda expression and use the (arguably more Pythonic) idiom \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{def~get{\_}time()}\\ \mbox{~~~~return~oopp.get{\_}time()}\\ \mbox{get{\_}time=staticmethod(oopp.get{\_}time)} \end{flushleft}\end{ttfamily} \end{quote} as the documentation suggests: \begin{quote} \begin{verbatim}>>> print staticmethod.__doc__ staticmethod(function) -> method Convert a function to be a static method. A static method does not receive an implicit first argument. To declare a static method, use this idiom: class C: def f(arg1, arg2, ...): ... f = staticmethod(f) It can be called either on the class (e.g. C.f()) or on an instance (e.g. C().f()). The instance is ignored except for its class. Static methods in Python are similar to those found in Java or C++. For a more advanced concept, see the classmethod builtin.\end{verbatim} \end{quote} At the present the notation for static methods is still rather ugly, but it is expected to improve in future versions of Python (probably in Python 2.4). Documentation for static methods can be found in Guido's essay and in the PEP.. : however this is intended for developers. As the docstring says, static methods are also ``attached'' to the class and may be called with the syntax \texttt{Clock.get{\_}time()}. A similar remark applies for the so called \emph{classmethods}: \begin{quote} \begin{verbatim}>>> print classmethod.__doc__ classmethod(function) -> method Convert a function to be a class method. A class method receives the class as implicit first argument, just like an instance method receives the instance. To declare a class method, use this idiom: class C: def f(cls, arg1, arg2, ...): ... f = classmethod(f) It can be called either on the class (e.g. C.f()) or on an instance (e.g. C().f()). The instance is ignored except for its class. If a class method is called for a derived class, the derived class object is passed as the implied first argument. Class methods are different than C++ or Java static methods. If you want those, see the staticmethod builtin.\end{verbatim} \end{quote} {\#}When a regular method is invoked, a reference to the calling object is {\#}implicitely passed as first argument; instead, when a static method is {\#}invoked, no reference to the calling object is passed. As the docstring says, classmethods are convenient when one wants to pass to a method the calling \emph{class}, not the calling object. Here there is an example: \begin{quote} \begin{verbatim}>>> class Clock(object): pass >>> Clock.name=classmethod(lambda cls: cls.__name__) >>> Clock.name() # called by the class 'Clock' >>> Clock().name() # called by an instance 'Clock'\end{verbatim} \end{quote} Notice that classmethods (and staticmethods too) can only be attached to classes, not to objects: \begin{quote} \begin{verbatim}>>> class Clock(object): pass >>> c=Clock() >>> c.name=classmethod(lambda cls: cls.__name__) >>> c.name() #error Traceback (most recent call last): File "", line 1, in ? TypeError: 'classmethod' object is not callable\end{verbatim} \end{quote} gives a TypeError. The reason is that classmethods and staticmethods are implemented trough \emph{attribute descriptors}. This concept will be discussed in detail in a forthcoming in chapter 6. Notice that classmethods are not proving any fundamental feature, since one could very well use a normal method and retrieve the class with \texttt{self.{\_}{\_}class{\_}{\_}} as we did in the first chapter. Therefore, we could live without (actually, I think they are a non-essential complication to the language). Nevertheless, now that we have them, we can use them, since they come handy in various circumstances, as we will see in the following. %___________________________________________________________________________ \hypertarget{objects-have-their-privacy}{} \pdfbookmark[1]{Objects have their privacy}{objects-have-their-privacy} \subsection*{Objects have their privacy} In some situations, it is convenient to give to the developer some information that should be hided to the final user. To this aim Python uses private names (i.e. names starting with a single underscore) and private/protected attributes (i.e. attributes starting with a double underscore). Consider for instance the following script: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{import~time}\\ \mbox{}\\ \mbox{class~Clock(object):}\\ \mbox{~~~~{\_}{\_}secret="This~Clock~is~quite~stupid."}\\ \mbox{}\\ \mbox{myclock=Clock()}\\ \mbox{try:~print~myclock.{\_}{\_}secret}\\ \mbox{except~Exception,e:~print~"AttributeError:",e}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} The output of this script is \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{AttributeError:~'Clock'~object~has~no~attribute~'{\_}{\_}secret'} \end{flushleft}\end{ttfamily} \end{quote} Therefore, even if the Clock object \emph{does} have a \texttt{{\_}{\_}secret} attribute, the user cannot access it ! In this way she cannot discover that actually ``This Clock is quite stupid.'' In other programming languages, attributes like \texttt{{\_}{\_}secret} are called ``private'' attributes. However, in Python private attributes are not really private and their secrets can be accessed with very little effort. First of all, we may notice that \texttt{myclock} really contains a secret by using the builtin function \texttt{dir()}: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{dir(myclock)}\\ \mbox{['{\_}Clock{\_}{\_}secret',~'{\_}{\_}class{\_}{\_}',~'{\_}{\_}delattr{\_}{\_}',~'{\_}{\_}dict{\_}{\_}',~'{\_}{\_}doc{\_}{\_}',~}\\ \mbox{~'{\_}{\_}getattribute{\_}{\_}',~'{\_}{\_}hash{\_}{\_}',~'{\_}{\_}init{\_}{\_}',~'{\_}{\_}module{\_}{\_}',~'{\_}{\_}new{\_}{\_}',~}\\ \mbox{~'{\_}{\_}reduce{\_}{\_}',~'{\_}{\_}repr{\_}{\_}',~'{\_}{\_}setattr{\_}{\_}',~'{\_}{\_}str{\_}{\_}',~'{\_}{\_}weakref{\_}{\_}']} \end{flushleft}\end{ttfamily} \end{quote} We see that the first attribute of myclock is '{\_}Clock{\_}{\_}secret``, which we may access directly: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{print~myclock.{\_}Clock{\_}{\_}secret}\\ \mbox{This~clock~is~quite~stupid.} \end{flushleft}\end{ttfamily} \end{quote} We see here the secret of private variables in Python: the \emph{name mangling}. When Python sees a name starting with two underscores (and not ending with two underscores, otherwise it would be interpreted as a special attribute), internally it manage it as \texttt{{\_}Classname{\_}{\_}privatename}. Notice that if 'Classname' begins with underscores, the leading underscores are stripped in such a way to guarantee that the private name starts with only \emph{one} underscore. For instance, the '{\_}{\_}secret' private attribute of classes such as 'Clock', '{\_}Clock', '{\_}{\_}Clock', '{\_}{\_}{\_}Clock', etc. is mangled to '{\_}Clock{\_}{\_}secret'. Private names in Python are \emph{not} intended to keep secrets: they have other uses. \newcounter{listcnt17} \begin{list}{\arabic{listcnt17}.} { \usecounter{listcnt17} \setlength{\rightmargin}{\leftmargin} } \item {} On one hand, private names are a suggestion to the developer. When the Python programmer sees a name starting with one or two underscores in a program written by others, she understands that name should not be of concern for the final user, but it only concerns the internal implementation. \item {} On the other hand, private names are quite useful in class inheritance, since they provides safety with respect to the overriding operation. This point we will discussed in the next chapter. \item {} Names starting with one (or more) underscores are not imported by the statement \texttt{from module import *} \end{list} Remark: it makes no sense to define names with double underscores outside classes, since the name mangling doesn't work in this case. Let me show an example: \begin{quote} \begin{verbatim}>>> class Clock(object): __secret="This Clock is quite stupid" >>> def tellsecret(self): return self.__secret >>> Clock.tellsecret=tellsecret >>> Clock().tellsecret() #error Traceback (most recent call last): File "", line 1, in ? File "", line 2, in tellsecret AttributeError: 'Clock' object has no attribute '__secret'\end{verbatim} \end{quote} The explanation is that since \texttt{tellsecret()} is defined outside the class, \texttt{{\_}{\_}secret} is not expanded to \texttt{{\_}Clock{\_}{\_}secret} and therefore cannot be retrieved, whereas \begin{quote} \begin{verbatim}>>> class Clock(object): ... __secret="This Clock is quite stupid" ... def tellsecret(self): return self.__secret >>> Clock().tellsecret() This Clock is quite stupid\end{verbatim} \end{quote} will work. In other words, private variables are attached to classes, not objects. %___________________________________________________________________________ \hypertarget{objects-have-properties}{} \pdfbookmark[1]{Objects have properties}{objects-have-properties} \subsection*{Objects have properties} In the previous section we have shown that private variables are of little use for keeping secrets: if a developer really wants to restrict the access to some methods or attributes, she has to resort to \emph{properties}. Let me show an example: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{import~oopp}\\ \mbox{}\\ \mbox{class~Clock(object):~}\\ \mbox{~~~~'Clock~class~with~a~secret'}\\ \mbox{~~}\\ \mbox{~~~~you{\_}know{\_}the{\_}pw=False~{\#}default}\\ \mbox{~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~def~give{\_}pw(self,pw):}\\ \mbox{~~~~~~~~"""Check~if~your~know~the~password.~For~security,~one~should~crypt}\\ \mbox{~~~~~~~~the~password."""}\\ \mbox{~~~~~~~~self.you{\_}know{\_}the{\_}pw=(pw=="xyz")}\\ \mbox{~~~~~~}\\ \mbox{~~~~def~get{\_}secret(self):}\\ \mbox{~~~~~~~~if~self.you{\_}know{\_}the{\_}pw:}\\ \mbox{~~~~~~~~~~~~return~"This~clock~doesn't~work."}\\ \mbox{~~~~~~~~else:}\\ \mbox{~~~~~~~~~~~~return~"You~must~give~the~right~password~to~access~'secret'"}\\ \mbox{~~~~~~}\\ \mbox{~~~~secret=property(get{\_}secret)}\\ \mbox{}\\ \mbox{c=Clock()}\\ \mbox{print~c.secret~{\#}~=>~You~must~give~the~right~password~to~access~'secret'}\\ \mbox{c.give{\_}pw('xyz')~{\#}~gives~the~right~password}\\ \mbox{print~c.secret~{\#}~=>~This~clock~doesn't~work.}\\ \mbox{print~Clock.secret~{\#}~=>~}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} In this script, one wants to restrict the access to the attribute 'secret', which can be accessed only is the user provide the correct password. Obviously, this example is not very secure, since I have hard coded the password 'xyz' in the source code, which is easily accessible. In reality, one should crypt the password a perform a more sophisticated test than the trivial check \texttt{(pw=="xyz")}; anyway, the example is only intended to shown the uses of properties, not to be really secure. The key action is performed by the descriptor class \texttt{property} that converts the function \texttt{get{\_}secret} in a property object. Additional informations on the usage of \texttt{property} can be obtained from the docstring: \begin{quote} \begin{verbatim}>>> print property.__doc__ property(fget=None, fset=None, fdel=None, doc=None) -> property attribute fget is a function to be used for getting an attribute value, and likewise fset is a function for setting, and fdel a function for del'ing, an attribute. Typical use is to define a managed attribute x: class C(object): def getx(self): return self.__x def setx(self, value): self.__x = value def delx(self): del self.__x x = property(getx, setx, delx, "I'm the 'x' property.")\end{verbatim} \end{quote} Properties are another example of attribute descriptors. %___________________________________________________________________________ \hypertarget{objects-have-special-methods}{} \pdfbookmark[1]{Objects have special methods}{objects-have-special-methods} \subsection*{Objects have special methods} From the beginning, we stressed that objects have special attributes that may turn handy, as for instance the docstring \texttt{{\_}{\_}doc{\_}{\_}} and the class name attribute \texttt{{\_}{\_}class{\_}{\_}}. They have special methods, too. With little doubt, the most useful special method is the \texttt{{\_}{\_}init{\_}{\_}} method, that \emph{initializes} an object right after its creation. \texttt{{\_}{\_}init{\_}{\_}} is typically used to pass parameters to \emph{object factories}. Let me an example with geometric figures: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{~~}\\ \mbox{class~GeometricFigure(object):~{\#}an~example~of~object~factory}\\ \mbox{~~~~"""This~class~allows~to~define~geometric~figures~according~to~their}\\ \mbox{~~~~equation~in~the~cartesian~plane.~It~will~be~extended~later."""}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(self,equation,**parameters):}\\ \mbox{~~~~~~~~"Specify~the~cartesian~equation~of~the~object~and~its~parameters"}\\ \mbox{~~~~~~~~self.eq=equation}\\ \mbox{~~~~~~~~self.par=parameters}\\ \mbox{~~~~~~~~for~k,v~in~self.par.items():~{\#}replaces~the~parameters~in~the~equation}\\ \mbox{~~~~~~~~~~~~self.eq=self.eq.replace(k,str(v))}\\ \mbox{~~~~~~~~self.contains=eval('lambda~x,y~:~'+self.eq)~}\\ \mbox{~~~~~~~~{\#}~dynamically~creates~the~function~'contains'}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Here it is how it works: \begin{quote} \begin{verbatim}>>> from oopp import * >>> disk=GeometricFigure('(x-x0)**2+(y-y0)**2 <= r**2', x0=0,y0=0,r=5) >>> # creates a disk of radius 5 centered in the origing >>> disk.contains(1,2) #asks if the point (1,2) is inside the disk True >>> disk.contains(4,4) #asks if the point (4,4) is inside the disk False\end{verbatim} \end{quote} Let me continue the section on special methods with some some observations on \texttt{{\_}{\_}repr{\_}{\_}} and \texttt{{\_}{\_}str{\_}{\_}}.Notice that I will not discuss all the subtleties; for a thought discussion, see the thread ``Using {\_}{\_}repr{\_}{\_} or {\_}{\_}str{\_}{\_}'' in c.l.p. (Google is your friend). The following discussion applies to new style classes, old style classes are subtly different; moreover. When one writes \begin{quote} \begin{verbatim}>>> disk \end{verbatim} \end{quote} one obtains the \emph{string representation} of the object. Actually, the previous line is syntactic sugar for \begin{quote} \begin{verbatim}>>> print repr(disk) \end{verbatim} \end{quote} or \begin{quote} \begin{verbatim}>>> print disk.__repr__() \end{verbatim} \end{quote} The \texttt{repr} function extracts the string representation from the the special method \texttt{{\_}{\_}repr{\_}{\_}}, which can be redefined in order to have objects pretty printed. Notice that \texttt{repr} is conceptually different from the \texttt{str} function that controls the output of the \texttt{print} statement. Actually, \texttt{print o} is syntactic sugar for \texttt{print str(o)} which is sugar for \texttt{print o.{\_}{\_}str{\_}{\_}()}. If for instance we define \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~PrettyPrinted(object):}\\ \mbox{~~~~formatstring='{\%}s'~{\#}~default}\\ \mbox{~~~~def~{\_}{\_}str{\_}{\_}(self):}\\ \mbox{~~~~~~~~"""Returns~the~name~of~self~in~quotes,~possibly~formatted~via~}\\ \mbox{~~~~~~~~self.formatstring.~If~self~has~no~name,~returns~the~name~}\\ \mbox{~~~~~~~~of~its~class~in~angular~brackets."""~}\\ \mbox{~~~~~~~~try:~{\#}look~if~the~selfect~has~a~name~}\\ \mbox{~~~~~~~~~~~name="'{\%}s'"~{\%}~self.{\_}{\_}name{\_}{\_}~}\\ \mbox{~~~~~~~~except~AttributeError:~{\#}if~not,~use~the~name~of~its~class}\\ \mbox{~~~~~~~~~~~~name='<{\%}s>'~{\%}~type(self).{\_}{\_}name{\_}{\_}}\\ \mbox{~~~~~~~~if~hasattr(self,'formatstring'):}\\ \mbox{~~~~~~~~~~~~return~self.formatstring~{\%}~name}\\ \mbox{~~~~~~~~else:~}\\ \mbox{~~~~~~~~~~~~return~name}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} then we have \begin{quote} \begin{verbatim}>>> from oopp import PrettyPrinted >>> o=PrettyPrinted() # o is an instance of PrettyPrinted >>> print o #invokes o.__str__() which in this case returns o.__class__.name \end{verbatim} \end{quote} whereas \begin{quote} \begin{verbatim}>>> o # i.e. print repr(o) \end{verbatim} \end{quote} However, in most cases \texttt{{\_}{\_}repr{\_}{\_}} and \texttt{{\_}{\_}str{\_}{\_}} gives the same output, since if \texttt{{\_}{\_}str{\_}{\_}} is not explicitely defined it defaults to \texttt{{\_}{\_}repr{\_}{\_}}. Therefore, whereas modifying \texttt{{\_}{\_}str{\_}{\_}} does not change \texttt{{\_}{\_}repr{\_}{\_}}, modifying \texttt{{\_}{\_}repr{\_}{\_}} changes \texttt{{\_}{\_}str{\_}{\_}}, if \texttt{{\_}{\_}str{\_}{\_}} is not explicitely given: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{"{\_}{\_}repr{\_}{\_}~can~also~be~a~regular~method,~not~a~classmethod"}\\ \mbox{}\\ \mbox{class~Frog(object):}\\ \mbox{~~~~attributes="poor,~small,~ugly"}\\ \mbox{~~~~def~{\_}{\_}str{\_}{\_}(self):}\\ \mbox{~~~~~~~~return~"I~am~a~"+self.attributes+'~'+self.{\_}{\_}class{\_}{\_}.{\_}{\_}name{\_}{\_}}\\ \mbox{}\\ \mbox{class~Prince(object):}\\ \mbox{~~~~attributes='rich,~tall,~beautiful'}\\ \mbox{~~~~def~{\_}{\_}str{\_}{\_}(self):}\\ \mbox{~~~~~~~~return~"I~am~a~"+self.attributes+'~'+self.{\_}{\_}class{\_}{\_}.{\_}{\_}name{\_}{\_}}\\ \mbox{}\\ \mbox{jack=Frog();~print~repr(jack),jack}\\ \mbox{charles=Prince();~print~repr(charles),charles~~}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} The output of this script is: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{~I~am~a~poor,~small,~ugly~Frog}\\ \mbox{~I~am~a~rich,~tall,~beautiful~Prince} \end{flushleft}\end{ttfamily} \end{quote} for jack and charles respectively. \texttt{{\_}{\_}str{\_}{\_}} and \texttt{{\_}{\_}repr{\_}{\_}} are also called by the formatting operators ``{\%}s'' and ``{\%}r''. Notice that i) \texttt{{\_}{\_}str{\_}{\_}} can be most naturally rewritten as a class method; ii) Python is magic: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{~~}\\ \mbox{"""Shows~two~things:~}\\ \mbox{~~~~1)~redefining~{\_}{\_}repr{\_}{\_}~automatically~changes~the~output~of~{\_}{\_}str{\_}{\_}}\\ \mbox{~~~~2)~the~class~of~an~object~can~be~dinamically~changed!~"""}\\ \mbox{}\\ \mbox{class~Frog(object):}\\ \mbox{~~~~attributes="poor,~small,~ugly"}\\ \mbox{~~~~def~{\_}{\_}repr{\_}{\_}(cls):}\\ \mbox{~~~~~~~~return~"I~am~a~"+cls.attributes+'~'+cls.{\_}{\_}name{\_}{\_}}\\ \mbox{~~~~{\_}{\_}repr{\_}{\_}=classmethod({\_}{\_}repr{\_}{\_})}\\ \mbox{}\\ \mbox{class~Prince(object):}\\ \mbox{~~~~attributes='rich,~tall,~beautiful'}\\ \mbox{~~~~def~{\_}{\_}repr{\_}{\_}(cls):}\\ \mbox{~~~~~~~~return~"I~am~a~"+cls.attributes+'~'+cls.{\_}{\_}name{\_}{\_}}\\ \mbox{~~~~{\_}{\_}repr{\_}{\_}=classmethod({\_}{\_}repr{\_}{\_})}\\ \mbox{}\\ \mbox{def~princess{\_}kiss(frog):}\\ \mbox{~~~~~~frog.{\_}{\_}class{\_}{\_}=Prince}\\ \mbox{}\\ \mbox{jack=Frog()}\\ \mbox{princess{\_}kiss(jack)}\\ \mbox{print~jack~{\#}~the~same~as~repr(jack)}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Now the output for jack is ``I am a rich, tall, beautiful Prince'' ! In Python you may dynamically change the class of an object!! Of course, this is a feature to use with care ;-) There are many others special methods, such as {\_}{\_}new{\_}{\_}, {\_}{\_}getattr{\_}{\_}, {\_}{\_}setattr{\_}{\_}, etc. They will be discussed in the next chapters, in conjunction with inheritance. %___________________________________________________________________________ \hypertarget{objects-can-be-called-added-subtracted}{} \pdfbookmark[1]{Objects can be called, added, subtracted, ...}{objects-can-be-called-added-subtracted} \subsection*{Objects can be called, added, subtracted, ...} Python provides a nice generalization of functions, via the concept of \emph{callable objects}. A callable object is an object with a \texttt{{\_}{\_}call{\_}{\_}} special method. They can be used to define ``functions'' that remember how many times they are invoked: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~MultiplyBy(object):}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(self,n):}\\ \mbox{~~~~~~~~self.n=n}\\ \mbox{~~~~~~~~self.counter=0}\\ \mbox{~~~~def~{\_}{\_}call{\_}{\_}(self,x):}\\ \mbox{~~~~~~~~self.counter+=1}\\ \mbox{~~~~~~~~return~self.n*x}\\ \mbox{}\\ \mbox{double=MultiplyBy(2)}\\ \mbox{res=double(double(3))~{\#}~res=12}\\ \mbox{print~"double~is~callable:~{\%}s"~{\%}~callable(double)}\\ \mbox{print~"You~have~called~double~{\%}s~times."~{\%}~double.counter}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} With output \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{double~is~callable:~~True}\\ \mbox{You~have~called~double~2~times.} \end{flushleft}\end{ttfamily} \end{quote} The script also show that callable objects (including functions) can be recognized with the \texttt{callable} built-in function. Callable object solves elegantly the problem of having ``static'' variables inside functions (cfr. with the 'double' example in chapter 2). A class with a \texttt{{\_}{\_}call{\_}{\_}} method can be used to generate an entire set of customized ``functions''. For this reason, callable objects are especially useful in the conjunction with object factories. Let me show an application to my factory of geometric figures: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~Makeobj(object):}\\ \mbox{~~~~"""A~factory~of~object~factories.~Makeobj(cls)~returns~instances}\\ \mbox{~~~~~of~cls"""}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(self,cls,*args):}\\ \mbox{~~~~~~~~self.cls=cls}\\ \mbox{~~~~~~~~self.args=args}\\ \mbox{~~~~def~{\_}{\_}call{\_}{\_}(self,**pars):}\\ \mbox{~~~~~~~~return~self.cls(*self.args,**pars)}\\ \mbox{}\\ \mbox{{\#}}\\ \mbox{}\\ \mbox{{\#}}\\ \mbox{}\\ \mbox{from~oopp~import~Makeobj,GeometricFigure}\\ \mbox{}\\ \mbox{makedisk=Makeobj(GeometricFigure,'(x-x0)**2+(y-y0)**2~False}\\ \mbox{print~square.contains(9,9)~{\#}~=>~True}\\ \mbox{{\#}etc.}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} This factory generates callable objects, such as \texttt{makedisk} and \texttt{makesquare} that returns geometric objects. It gives a nicer interface to the object factory provided by 'GeometricFigure'. Notice that the use of the expression \texttt{disk.contains(9,9)} in order to know if the point of coordinates (9,9) is contained in the disk, it is rather inelegant: it would be much better to be able to ask if \texttt{(9,9) in disk}. This is possibile, indeed: and the secrets is to define the special method \texttt{{\_}{\_}contains{\_}{\_}}. This is done in the next example, that I think give a good taste of the beauty of objects \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{from~oopp~import~Makeobj}\\ \mbox{}\\ \mbox{Nrow=50;~Ncol=78}\\ \mbox{~~}\\ \mbox{class~GeometricFigure(object):}\\ \mbox{~~~~"""This~class~allows~to~define~geometric~figures~according~to~their}\\ \mbox{~~~~equation~in~the~cartesian~plane.~Moreover~addition~and~subtraction}\\ \mbox{~~~~of~geometric~figures~are~defined~as~union~and~subtraction~of~sets."""}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(self,equation,**parameters):}\\ \mbox{~~~~~~~~"Initialize~"}\\ \mbox{~~~~~~~~self.eq=equation}\\ \mbox{~~~~~~~~self.par=parameters}\\ \mbox{~~~~~~~~for~(k,v)~in~self.par.items():~{\#}replaces~the~parameters}\\ \mbox{~~~~~~~~~~~~self.eq=self.eq.replace(k,str(v))}\\ \mbox{~~~~~~~~self.contains=eval('lambda~x,y~:~'+self.eq)}\\ \mbox{~~~~def~combine(self,fig,operator):}\\ \mbox{~~~~~~~~"""Combine~self~with~the~geometric~figure~fig,~using~the}\\ \mbox{~~~~~~~~operators~"or"~(addition)~and~"and~not"~(subtraction)"""}\\ \mbox{~~~~~~~~comboeq="("+self.eq+")"+operator+"("+fig.eq+")"}\\ \mbox{~~~~~~~~return~GeometricFigure(comboeq)}\\ \mbox{~~~~def~{\_}{\_}add{\_}{\_}(self,fig):}\\ \mbox{~~~~~~~~"Union~of~sets"}\\ \mbox{~~~~~~~~return~self.combine(fig,'~or~')}\\ \mbox{~~~~def~{\_}{\_}sub{\_}{\_}(self,fig):}\\ \mbox{~~~~~~~~"Subtraction~of~sets"}\\ \mbox{~~~~~~~~return~self.combine(fig,'~and~not')}\\ \mbox{~~~~def~{\_}{\_}contains{\_}{\_}(self,point):~{\#}point~is~a~tuple~(x,y)}\\ \mbox{~~~~~~~~return~self.contains(*point)}\\ \mbox{}\\ \mbox{makedisk=Makeobj(GeometricFigure,'(x-x0)**2/4+(y-y0)**2~<=~r**2')}\\ \mbox{upperdisk=makedisk(x0=38,y0=7,r=5)}\\ \mbox{smalldisk=makedisk(x0=38,y0=30,r=5)}\\ \mbox{bigdisk=makedisk(x0=38,y0=30,r=14)}\\ \mbox{}\\ \mbox{def~format(text,shape):}\\ \mbox{~~~~"Format~the~text~in~the~shape~given~by~figure"}\\ \mbox{~~~~text=text.replace('{\textbackslash}n','~')}\\ \mbox{~~~~out=[];~i=0;~col=0;~row=0;~L=len(text)}\\ \mbox{~~~~while~1:}\\ \mbox{~~~~~~~~if~(col,row)~in~shape:}\\ \mbox{~~~~~~~~~~~~out.append(text[i]);~i+=1}\\ \mbox{~~~~~~~~~~~~if~i==L:~break}\\ \mbox{~~~~~~~~else:}\\ \mbox{~~~~~~~~~~~~out.append("~")}\\ \mbox{~~~~~~~~if~col==Ncol-1:}\\ \mbox{~~~~~~~~~~~~col=0;~out.append('{\textbackslash}n')~{\#}~starts~new~row}\\ \mbox{~~~~~~~~~~~~if~row==Nrow-1:~row=0~~~{\#}~starts~new~page}\\ \mbox{~~~~~~~~~~~~else:~row+=1}\\ \mbox{~~~~~~~~else:~col+=1~}\\ \mbox{~~~~return~''.join(out)}\\ \mbox{}\\ \mbox{composition=bigdisk-smalldisk+upperdisk}\\ \mbox{print~format(text='Python~Rules!'*95,shape=composition)}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} I leave as an exercise for the reader to understand how does it work and to play with other geometric figures (he can also generate them trough the 'Makeobj' factory). I think it is nicer to show its output: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~~Pyt~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~hon~Rules!Pyt~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~~hon~Rules!Python~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~Rules!Python~Rules!~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~Python~Rules!Python~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~Rules!Python~Rules!P~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~ython~Rules!Python~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~Rules!Python~Rules!~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~~Python~Rules!Pyth~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~on~Rules!Pyth~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~~on~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~~Rul~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~es!Python~Rules!Pytho~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~n~Rules!Python~Rules!Python~R~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~ules!Python~Rules!Python~Rules!Pyth~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~on~Rules!Python~Rules!Python~Rules!Pyth~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~on~Rules!Python~Rules!Python~Rules!Python~R~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~ules!Python~Rules!Python~Rules!Python~Rules!Pyt~~~~~~~~~~~~~~~~}\\ \mbox{~~~~hon~Rules!Python~Rules!Python~Rules!Python~Rules!~~~~~~~~~~~~~~~}\\ \mbox{~~~Python~Rules!Python~Rules!Python~Rules!Python~Rules~~~~~~~~~~~~~~}\\ \mbox{~~!Python~Rules!Python~Rule~~~s!Python~Rules!Python~Rul~~~~~~~~~~~~~}\\ \mbox{~~es!Python~Rules!Pyth~~~~~~~~~~~~~on~Rules!Python~Rule~~~~~~~~~~~~~}\\ \mbox{~s!Python~Rules!Pyth~~~~~~~~~~~~~~~~~on~Rules!Python~Rul~~~~~~~~~~~~}\\ \mbox{~es!Python~Rules!Py~~~~~~~~~~~~~~~~~~~thon~Rules!Python~~~~~~~~~~~~~}\\ \mbox{~Rules!Python~Rules~~~~~~~~~~~~~~~~~~~!Python~Rules!Pyth~~~~~~~~~~~~}\\ \mbox{on~Rules!Python~Ru~~~~~~~~~~~~~~~~~~~~~les!Python~Rules!P~~~~~~~~~~~}\\ \mbox{~ython~Rules!Python~~~~~~~~~~~~~~~~~~~~Rules!Python~Rule~~~~~~~~~~~~}\\ \mbox{~s!Python~Rules!Pyt~~~~~~~~~~~~~~~~~~~hon~Rules!Python~R~~~~~~~~~~~~}\\ \mbox{~ules!Python~Rules!P~~~~~~~~~~~~~~~~~ython~Rules!Python~~~~~~~~~~~~~}\\ \mbox{~~Rules!Python~Rules!P~~~~~~~~~~~~~ython~Rules!Python~R~~~~~~~~~~~~~}\\ \mbox{~~ules!Python~Rules!Python~~~~Rules!Python~Rules!Python~~~~~~~~~~~~~}\\ \mbox{~~~~Rules!Python~Rules!Python~Rules!Python~Rules!Pytho~~~~~~~~~~~~~~}\\ \mbox{~~~~n~Rules!Python~Rules!Python~Rules!Python~Rules!Py~~~~~~~~~~~~~~~}\\ \mbox{~~~~~thon~Rules!Python~Rules!Python~Rules!Python~Rul~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~es!Python~Rules!Python~Rules!Python~Rules!P~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~ython~Rules!Python~Rules!Python~Rules!P~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~ython~Rules!Python~Rules!Python~Rul~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~es!Python~Rules!Python~Rules!~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~Python~Rules!Python~R~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~~ule~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~~s!} \end{flushleft}\end{ttfamily} \end{quote} Remark. Unfortunately, ``funnyformatter.py'' does not reuse old code: in spite of the fact that we already had in our library the 'GeometricFigure' class, with an ``{\_}{\_}init{\_}{\_}'' method that is exactly the same of the ``{\_}{\_}init{\_}{\_}'' method in ``funnyformatter.py'', we did not reuse that code. We simply did a cut and paste. This means that if we later find a bug in the \texttt{{\_}{\_}init{\_}{\_}} method, we will have to fix it twice, both in the script and in the library. Also, if we plan to extend the method later, we will have to extend it twice. Fortunately, this nasty situation can be avoided: but this requires the power of inheritance. %___________________________________________________________________________ \hypertarget{the-power-of-classes}{} \pdfbookmark[0]{THE POWER OF CLASSES}{the-power-of-classes} \section*{THE POWER OF CLASSES} This chapter is devoted to the concept of class inheritance. I will discuss single inheritance, cooperative methods, multiple inheritance and more. %___________________________________________________________________________ \hypertarget{the-concept-of-inheritance}{} \pdfbookmark[1]{The concept of inheritance}{the-concept-of-inheritance} \subsection*{The concept of inheritance} Inheritance is perhaps the most important basic feature in OOP, since it allows the reuse and incremental improvement of old code. To show this point, let me come back to one of the examples I have introduced in the last chapter, 'fairytale1.py' script, where I defined the classes 'Frog' and 'Prince' as \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{class~Frog(object):}\\ \mbox{~~~~attributes="poor,~small,~ugly"}\\ \mbox{~~~~def~{\_}{\_}str{\_}{\_}(self):}\\ \mbox{~~~~~~~~return~"I~am~a~"+self.attributes+'~'+self.{\_}{\_}class{\_}{\_}.{\_}{\_}name{\_}{\_}}\\ \mbox{}\\ \mbox{class~Prince(object):}\\ \mbox{~~~~attributes='rich,~tall,~beautiful'}\\ \mbox{~~~~def~{\_}{\_}str{\_}{\_}(self):}\\ \mbox{~~~~~~~~return~"I~am~a~"+self.attributes+'~'+self.{\_}{\_}class{\_}{\_}.{\_}{\_}name{\_}{\_}} \end{flushleft}\end{ttfamily} \end{quote} We see that the way we followed here was very bad since: \newcounter{listcnt18} \begin{list}{\arabic{listcnt18}.} { \usecounter{listcnt18} \setlength{\rightmargin}{\leftmargin} } \item {} The \texttt{{\_}{\_}str{\_}{\_}} method is duplicated both in Frog and in Prince: that means that if we find a bug a later, we have to fix it twice! \item {} The \texttt{{\_}{\_}str{\_}{\_}} was already defined in the PrettyPrinted class (actually more elegantly), therefore we have triplicated the work and worsened the situation! \end{list} This is very much against the all philosophy of OOP: \begin{quote} \emph{never cut and paste!} \end{quote} We should \emph{reuse} old code, not paste it! The solution is \emph{class inheritance}. The idea behind inheritance is to define new classes as subclasses of a \emph{parent} classes, in such a way that the \emph{children} classes possess all the features of the parents. That means that we do not need to redefine the properties of the parents explicitely. In this example, we may derive both 'Frog' and 'Prince' from the 'PrettyPrinted' class, thus providing to both 'Frog' and 'Prince' the \texttt{PrettyPrinted.{\_}{\_}str{\_}{\_}} method with no effort: \begin{quote} \begin{verbatim}>>> from oopp import PrettyPrinted >>> class Frog(PrettyPrinted): attributes="poor, small, ugly" ... >>> class Prince(PrettyPrinted): attributes="rich, tall, beautiful" ... >>> print repr(Frog()), Frog() <__main__.Frog object at 0x401cbeac> >>> print Prince() >>> print repr(Prince()),Prince() <__main__.Prince object at 0x401cbaac> \end{verbatim} \end{quote} Let me show explicitly that both 'Frog' and 'Prince' share the 'PrettyPrinted.{\_}{\_}str{\_}{\_}' method: \begin{quote} \begin{verbatim}>>> id(Frog.__str__) # of course, YMMV 1074329476 >>> id(Prince.__str__) 1074329476 >>> id(PrettyPrinted.__str__) 1074329476\end{verbatim} \end{quote} The method is always the same, since the object reference is the same (the precise value of the reference is not guaranteed to be 1074329476, however!). This example is good to show the first advantage of inheritance: \emph{avoiding duplication of code}. Another advantage of inheritance, is \emph{extensibility}: one can very easily improve existing code. For instance, having written the \texttt{Clock} class once, I can reuse it in many different ways. for example I can build a \texttt{Timer} to be used for benchmarks. It is enough to reuse the function \texttt{with{\_}timer} introduced in the first chapter (functions are good for reuse of code, too ;): \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~Timer(Clock):}\\ \mbox{~~~~"Inherits~the~get{\_}time~staticmethod~from~Clock"}\\ \mbox{~~~~execute=staticmethod(with{\_}timer)}\\ \mbox{~~~~loop{\_}overhead=staticmethod(loop{\_}overhead)}\\ \mbox{}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Here there is an example of application: \begin{quote} \begin{verbatim}>>> from oopp import Timer >>> Timer.get_time() '16:07:06'\end{verbatim} \end{quote} Therefore 'Timer' inherits 'Clock.get{\_}time'; moreover it has the additional method \texttt{execute}: \begin{quote} \begin{verbatim}>>> def square(x): return x*x ... >>> Timer.execute(square,n=100000)(1) executing square ... Real time: 0.01 ms CPU time: 0.008 ms\end{verbatim} \end{quote} The advantage of putting the function \texttt{execute} in a class is that now we may \emph{inherit} from that class and improve out timer \emph{ad libitum}. %___________________________________________________________________________ \hypertarget{inheritance-versus-run-time-class-modifications}{} \pdfbookmark[1]{Inheritance versus run-time class modifications}{inheritance-versus-run-time-class-modifications} \subsection*{Inheritance versus run-time class modifications} Naively, one could think of substituting inheritance with run-time modification of classes, since this is allowed by Python. However, this is not such a good idea, in general. Let me give a simple example. Suppose we want to improve our previous clock, to show the date, too. We could reach that goal with the following script: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{"Shows~how~to~modify~and~enhances~classes~on~the~fly"}\\ \mbox{}\\ \mbox{from~oopp~import~*}\\ \mbox{}\\ \mbox{clock=Clock()~{\#}creates~a~Clock~instance}\\ \mbox{print~clock.get{\_}time()~{\#}~print~the~current~time}\\ \mbox{}\\ \mbox{get{\_}data=lambda~:~'~'.join(time.asctime().split()[0:3])+~{\textbackslash}}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~'~'+time.asctime().split()[-1]}\\ \mbox{}\\ \mbox{get{\_}data{\_}and{\_}time=lambda~:~"Today~is:~{\%}s~{\textbackslash}nThe~time~is:~{\%}s"~{\%}~(}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~get{\_}data(),get{\_}time())~{\#}~enhances~get{\_}time}\\ \mbox{}\\ \mbox{Clock.get{\_}time=staticmethod(get{\_}data{\_}and{\_}time)}\\ \mbox{}\\ \mbox{print~clock.get{\_}time()~{\#}~print~the~current~time~and~data}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} The output of this script is: \begin{quote} 12:51:25 Today is: Sat Feb 22 2003 The time is: 12:51:25 \end{quote} Notice that: \newcounter{listcnt19} \begin{list}{\arabic{listcnt19}.} { \usecounter{listcnt19} \setlength{\rightmargin}{\leftmargin} } \item {} I instantiated the \texttt{clock} object \emph{before} redefining the \texttt{get{\_}time} method, when it only could print the time and \emph{not} the date. \item {} However, after the redefinition of the class, the behaviour of all its instances is changed, \emph{including the behaviour of objects instantiated before the change!}. Then \texttt{clock} \emph{can} print the date, too. \end{list} This is not so surprising, once you recognize that Guido own a very famous time-machine ... ;-) Seriously, the reason is that an object does not contains a reserved copy of the attributes and methods of its class: it only contains \emph{references} to them. If we change them in the class, the references to them in the object will stay the same, but the contents will change. In this example, I have solved the problem of enhancing the 'Clock' class without inheritance, but dynamically replaceing its \texttt{get{\_}time} (static) method with the \titlereference{get{\_}data{\_}and{\_}time`} (static) method. The dynamics modification of methods can be cool, but it should be avoided whenever possible, at least for two reasons [\hyperlink{id22}{11}]: \newcounter{listcnt20} \begin{list}{\arabic{listcnt20}.} { \usecounter{listcnt20} \setlength{\rightmargin}{\leftmargin} } \item {} having a class and therefore all its instances (including the instances created before the modification !) changed during the life-time of the program can be very confusing to the programmer, if not to the interpreter. \item {} the modification is destructive: I cannot have the old \texttt{get{\_}time} method and the new one at the same time, unless one explicitly gives to it a new name (and giving new names increases the pollution of the namespace). \end{list} Both these disadvantages can be solved by resorting to the mechanism of inheritance. For instance, in this example, we can derive a new class \texttt{NewClock} from \texttt{Clock} as follows: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{import~oopp,time}\\ \mbox{}\\ \mbox{get{\_}data=lambda~:~'~'.join(time.asctime().split()[0:3])+~{\textbackslash}}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~'~'+time.asctime().split()[-1]}\\ \mbox{}\\ \mbox{get{\_}data{\_}and{\_}time=lambda~:~"Today~is:~{\%}s~{\textbackslash}nThe~time~is:~{\%}s"~{\%}~(}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~get{\_}data(),oopp.get{\_}time())~{\#}~enhances~get{\_}time}\\ \mbox{}\\ \mbox{class~NewClock(oopp.Clock):}\\ \mbox{~~~~~~~"""NewClock~is~a~class~that~inherits~from~Clock,~provides~get{\_}data}\\ \mbox{~~~~~~~~and~overrides~get{\_}time."""}\\ \mbox{~~~~~~~get{\_}data=staticmethod(get{\_}data)}\\ \mbox{~~~~~~~get{\_}time=staticmethod(get{\_}data{\_}and{\_}time)}\\ \mbox{}\\ \mbox{clock=oopp.Clock();~print~'clock~output=',clock.get{\_}time()~}\\ \mbox{newclock=NewClock();~print~'newclock~output=',newclock.get{\_}time()}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} The output of this script is: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{clock~output=~16:29:17}\\ \mbox{newclock~output=~Today~is:~Sat~Feb~22~2003~}\\ \mbox{The~time~is:~16:29:17} \end{flushleft}\end{ttfamily} \end{quote} We see that the two problems previously discussed are solved since: \newcounter{listcnt21} \begin{list}{\roman{listcnt21})} { \usecounter{listcnt21} \setlength{\rightmargin}{\leftmargin} } \item {} there is no cut and paste: the old method \texttt{Clock.get{\_}time()} is used in the definition of the new method \texttt{NewClock.get{\_}time()}; \item {} the old method is still accessible as \texttt{Clock.get{\_}time()}; there is no need to invent a new name like \texttt{get{\_}time{\_}old()}. \end{list} We say that the method \texttt{get{\_}time} in \texttt{NewClock} \emph{overrides} the method \texttt{get{\_}time} in Clock. This simple example shows the power of inheritance in code reuse, but there is more than that. Inheritance is everywhere in Python, since all classes inherit from object. This means that all classes inherit the methods and attributes of the object class, such as \texttt{{\_}{\_}doc{\_}{\_}}, \texttt{{\_}{\_}class{\_}{\_}}, \texttt{{\_}{\_}str{\_}{\_}}, etc. \begin{quote} \begin{figure}[b]\hypertarget{id24}[12] There are cases when run-time modifications of classes is useful anyway: particularly when one wants to modify the behavior of classes written by others without changing the source code. I will show an example in next chapter. \end{figure} \end{quote} %___________________________________________________________________________ \hypertarget{inheriting-from-built-in-types}{} \pdfbookmark[1]{Inheriting from built-in types}{inheriting-from-built-in-types} \subsection*{Inheriting from built-in types} However, one can subclass a built-in type, effectively creating an user-defined type with all the feature of a built-in type, and modify it. Suppose for instance one has a keyword dictionary such as \begin{quote} \begin{verbatim}>>> kd={'title': "OOPP", 'author': "M.S.", 'year': 2003}\end{verbatim} \end{quote} it would be nice to be able to access the attributes without excessive quoting, i.e. using \texttt{kd.author} instead of \texttt{kd["author"]}. This can be done by subclassing the built-in class \texttt{dict} and by overriding the \texttt{{\_}{\_}getattr{\_}{\_}} and \texttt{{\_}{\_}setattr{\_}{\_}} special methods: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~kwdict(dict):}\\ \mbox{~~~~"Keyword~dictionary~base~class"}\\ \mbox{~~~~def~{\_}{\_}getattr{\_}{\_}(self,attr):~}\\ \mbox{~~~~~~~~return~self[attr]}\\ \mbox{~~~~def~{\_}{\_}setattr{\_}{\_}(self,key,val):~}\\ \mbox{~~~~~~~~self[key]=val}\\ \mbox{~~~~{\_}{\_}str{\_}{\_}~=~pretty~}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Here there is an example of usage: \begin{quote} \begin{verbatim}>>> from oopp import kwdict >>> book=kwdict({'title': "OOPP", 'author': "M.S."}) >>> book.author #it works 'M.S.' >>> book["author"] # this also works 'M.S.' >>> book.year=2003 #you may also add new fields on the fly >>> print book author = M.S. title = OOPP year = 2003\end{verbatim} \end{quote} The advantage of subclassing the built-in 'dict', it that you have for free all the standard dictionary methods, without having to reimplement them. However, to subclass built-in it is not always a piece of cake. In many cases there are complications, indeed. Suppose for instance one wants to create an enhanced string type, with the ability of indent and dedent a block of text provided by the following functions: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~indent(block,n):}\\ \mbox{~~~~~"Indent~a~block~of~code~by~n~spaces"}\\ \mbox{~~~~~return~'{\textbackslash}n'.join(['~'*n+line~for~line~in~block.splitlines()])}\\ \mbox{}\\ \mbox{def~dedent(block):}\\ \mbox{~~~~"Dedent~a~block~of~code,~if~need~there~is"""}\\ \mbox{~~~~lines=block.splitlines()}\\ \mbox{~~~~for~line~in~lines:}\\ \mbox{~~~~~~~~strippedline=line.lstrip()}\\ \mbox{~~~~~~~~if~strippedline:~break}\\ \mbox{~~~~spaces=len(line)-len(strippedline)}\\ \mbox{~~~~if~not~spaces:~return~block}\\ \mbox{~~~~return~'{\textbackslash}n'.join([line[spaces:]~for~line~in~lines])}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} The solution is to inherit from the built-in string type \texttt{str}, and to add to the new class the \texttt{indent} and \texttt{dedent} methods: \begin{quote} \begin{verbatim}>>> from oopp import indent,dedent >>> class Str(str): ... indent=indent ... dedent=dedent >>> s=Str('spam\neggs') >>> type(s) >>> print s.indent(4) spam eggs\end{verbatim} \end{quote} However, this approach has a disadvantage, since the output of \texttt{indent} is not a \texttt{Str}, but a normal \texttt{str}, therefore without the additional \texttt{indent} and \texttt{dedent} methods: \begin{quote} \begin{verbatim}>>> type(s.indent(4)) >>> s.indent(4).indent(4) #error Traceback (most recent call last): File "", line 9, in ? AttributeError: 'str' object has no attribute 'indent' >>> s.indent(4).dedent(4) #error Traceback (most recent call last): File "", line 9, in ? AttributeError: 'str' object has no attribute 'dedent'\end{verbatim} \end{quote} We would like \texttt{indent} to return a \texttt{Str} object. To solve this problem it is enough to rewrite the class as follows: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{from~oopp~import~indent,dedent}\\ \mbox{}\\ \mbox{class~Str(str):}\\ \mbox{~~~def~indent(self,n):}\\ \mbox{~~~~~~~return~Str(indent(self,n))}\\ \mbox{~~~def~dedent(self):}\\ \mbox{~~~~~~~return~Str(dedent(self))}\\ \mbox{}\\ \mbox{s=Str('spam{\textbackslash}neggs').indent(4)}\\ \mbox{print~type(s)}\\ \mbox{print~s~{\#}~indented~s}\\ \mbox{s=s.dedent()}\\ \mbox{print~type(s)}\\ \mbox{print~s~{\#}~non-indented~s}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Now, everything works and the output of the previous script is \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{}\\ \mbox{~~~~spam}\\ \mbox{~~~~eggs}\\ \mbox{}\\ \mbox{spam}\\ \mbox{eggs} \end{flushleft}\end{ttfamily} \end{quote} The solution works because now \texttt{indent()} returns an instance of \texttt{Str}, which therefore has an \texttt{indent} method. Unfortunately, this is not the end. Suppose we want to add another food to our list: \begin{quote} \begin{verbatim}>>> s2=s+Str("\nham") >>> s2.indent(4) #error Traceback (most recent call last): File "", line 1, in ? AttributeError: 'str' object has no attribute 'indent'\end{verbatim} \end{quote} The problem is the same, again: the type of \texttt{s2} is \texttt{str} \begin{quote} \begin{verbatim}>>> type(s2) \end{verbatim} \end{quote} and therefore there is no \texttt{indent} method available. There is a solution to this problem, i.e. to redefine the addition operator for objects of the class \texttt{Str}. This can be done directly by hand, but it is \emph{ugly} for the following reasons: \newcounter{listcnt22} \begin{list}{\arabic{listcnt22}.} { \usecounter{listcnt22} \setlength{\rightmargin}{\leftmargin} } \item {} If you derive a new class from \texttt{Str} you have to redefine the addition operator (both the left addition and the right addition [\hyperlink{id24}{12}]) again (ughh!); \item {} There are others operators you must redefine, in particular the the augumented assignement operator \texttt{+=}, the repetition operator \texttt{*} and its augmented version \texttt{*=}; \item {} In the case of numeric types, one must redefine, \texttt{+,-,*,/,//, mod,}, possibily \texttt{<<,>>} and others, including the corresponding augumented assignement operators and the left and the right form of the operators. \end{list} This is a mess, especially since due to point 1, one has to redefined all the operators each time she defines a new subclass. I short, one has to write a lot of boilerplate for a stupid job that the language should be able to perform itself automatically. But here are the good news: Python \emph{can} do all that automatically, in an elegant and beautiful way, which works for all types, too. But this requires the magic of metaclasses. \begin{quote} \begin{figure}[b]\hypertarget{id26}[13] The right addition works this way. Python looks at the expression x+y and if x has an explicit{\_}{\_}add{\_}{\_} method invokes it; on the other hand, if x does not define an {\_}{\_}add{\_}{\_} method, Python considers y+x. If y defines a {\_}{\_}radd{\_}{\_} method, it invokes it, otherwise raises an exception. The same is done for right multiplication, etc. \end{figure} \end{quote} %___________________________________________________________________________ \hypertarget{controlling-the-creation-of-objects}{} \pdfbookmark[1]{Controlling the creation of objects}{controlling-the-creation-of-objects} \subsection*{Controlling the creation of objects} Before introducing multiple inheritance, let me make a short digression on the mechanism of object creation in Python 2.2+. The important point is that new style classes have a \texttt{{\_}{\_}new{\_}{\_}} static method that allows the user to take complete control of object creation. To understand how \texttt{{\_}{\_}new{\_}{\_}} works, I must explain what happens when an object is instantiated with a statement like \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{s=Str("spam")~{\#}object~creation} \end{flushleft}\end{ttfamily} \end{quote} What happens under the hood, is that the special static method \texttt{{\_}{\_}new{\_}{\_}} of the class \texttt{Str} (inherited from the built-in \texttt{str} class) is invoked \texttt{before} the \texttt{Str.{\_}{\_}init{\_}{\_}} method. This means that the previous line should really be considered syntactic sugar for: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{s=Str.{\_}{\_}new{\_}{\_}(Str,"spam")~{\#}~Str.{\_}{\_}new{\_}{\_}~is~actually~str.{\_}{\_}new{\_}{\_}}\\ \mbox{assert~isinstance(s,Str)}\\ \mbox{Str.{\_}{\_}init{\_}{\_}(s,"spam")~~{\#}~Str.{\_}{\_}init{\_}{\_}~is~actually~str.{\_}{\_}init{\_}{\_}} \end{flushleft}\end{ttfamily} \end{quote} Put it more verbosely, what happens during the object creation is the following: \newcounter{listcnt23} \begin{list}{\arabic{listcnt23}.} { \usecounter{listcnt23} \setlength{\rightmargin}{\leftmargin} } \item {} the static method \texttt{{\_}{\_}new{\_}{\_}} is invoked with the class of the created object as first argument [\hyperlink{id26}{13}]; \item {} \texttt{{\_}{\_}new{\_}{\_}} returns an instance of that class. \item {} the instance is then initialized by the \texttt{{\_}{\_}init{\_}{\_}} method. \end{list} Notice that both \texttt{{\_}{\_}new{\_}{\_}} and \texttt{{\_}{\_}init{\_}{\_}} are called with the same argument list, therefore one must make sure that they have a compatible signature. Let me discuss now why \texttt{{\_}{\_}new{\_}{\_}} must be a static method. First of all, it cannot be a normal method with a first argument which is an instance of the calling class, since at the time of \texttt{{\_}{\_}new{\_}{\_}} invocation that instance (\texttt{myclock} in the example) has still to be created Since \texttt{{\_}{\_}new{\_}{\_}} needs information about the class calling it, one could think of implementing \texttt{{\_}{\_}new{\_}{\_}} as a class method. However, this would implicitly pass the caller class and return an instance of it. It is more convenient, to have the ability of creating instances of any class directly from C.{\_}{\_}new{\_}{\_}(B,*args,**kw) For this reasons, \texttt{{\_}{\_}new{\_}{\_}} must be a static method and pass explicitly the class which is calling it. Let me now show an important application of the \texttt{{\_}{\_}new{\_}{\_}} static method: forbidding object creation. For instance, sometimes it is useful to have classes that cannot be instantiated. This kind of classes can be obtained by inheriting from a \texttt{NonInstantiable} class: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~NonInstantiableError(Exception):~}\\ \mbox{~~~~pass}\\ \mbox{}\\ \mbox{class~NonInstantiable(object):~}\\ \mbox{~~~~def~{\_}{\_}new{\_}{\_}(cls,*args,**kw):}\\ \mbox{~~~~~~~~raise~NonInstantiableError("{\%}s~cannot~be~instantiated"~{\%}~cls)}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Here there is an example of usage: \begin{quote} \begin{verbatim}>>> from oopp import NonInstantiable,get_time >>> class Clock(NonInstantiable): ... get_time=staticmethod(get_time) >>> Clock.get_time() # works '18:48:08' Clock() #error Traceback (most recent call last): File "", line 1, in ? Clock() File "oopp.py", line 257, in __new__ raise NonInstantiableError("%s cannot be instantiated" % cls) NonInstantiableError: cannot be instantiated\end{verbatim} \end{quote} However, the approach pursued here has a disadvantage:\texttt{Clock} was already defined as a subclass of \texttt{object} and I has to change the source code to make it a subclass of 'NonInstantiable'. But what happens if I cannot change the sources? How can I \emph{reuse} the old code? The solution is provided by multiple inheritance. Notice that '{\_}{\_}new{\_}{\_}' is a staticmethod: [\hyperlink{id29}{14}] \begin{quote} \begin{quote} \begin{verbatim}>>> type(NonInstantiable.__dict__['__new__']) \end{verbatim} \end{quote} \begin{figure}[b]\hypertarget{id29}[14] This is how \texttt{type(s)} or \texttt{s.{\_}{\_}class{\_}{\_}} get to know that \texttt{s} is an instance of \texttt{Str}, since the class information is explicitely passed to the newborn object trough \texttt{{\_}{\_}new{\_}{\_}}. \end{figure} \begin{figure}[b]\hypertarget{id30}[15] However \texttt{object.{\_}{\_}dict{\_}{\_}['{\_}{\_}new{\_}{\_}']} is not a staticmethod \begin{verbatim}>>> type(object.__dict__['__new__']) # special case \end{verbatim} \end{figure} \end{quote} %___________________________________________________________________________ \hypertarget{multiple-inheritance}{} \pdfbookmark[1]{Multiple Inheritance}{multiple-inheritance} \subsection*{Multiple Inheritance} Multiple Inheritance (often abbreviated as MI) is often considered one of the most advanced topic in Object Oriented Programming. It is also one of the most difficult features to implement in an Object Oriented Programming language. Even, some languages by design decided to avoid it. This is for instance the case of Java, that avoided MI having seen its implementation in C++ (which is not for the faint of heart ;-) and uses a poorest form of it trough interfaces. For what concerns the scripting languages, of which the most famous are Perl, Python and Ruby (in this order, even if the right order would be Python, Ruby and Perl), only Python implements Multiple Inheritance well (Ruby has a restricted form of it trough mix-ins, whereas Perl implementation is too difficult for me to understand what it does ;). The fact that Multiple Inheritance can be hairy, does not mean that it is \emph{always} hairy, however. Multiple Inheritance is used with success in Lisp derived languages (including Dylan). The aims of this chapter is to discuss the Python support for MI in the most recent version (2.2 and 2.3), which has considerably improved with respect to previous versions. The message is the following: if Python 1.5 had a basic support for MI inheritance (basic but nevertheless with nice features, dynamic), Python 2.2 has \emph{greatly} improved that support and with the change of the Method Resolution Order in Python 2.3, we may say that support for MI is now \emph{excellent}. I strongly encourage Python programmers to use MI a lot: this will allows even a stronger reuse of code than in single inheritance. Often, inheritance is used when one has a complicate class B, and she wants to modify (or enhance) its behavior, by deriving a child class C, which is only slightly different from B. In this situation, B is already a standalone class, providing some non-trivial functionality, independently from the existence of C. This kind of design it typical of the so called \emph{top-down} philosophy, where one builds the all structure as a monolithic block, leaving room only for minor improvements. An alternative approach is the so called \emph{bottom-up} programming, in which one builds complicate things starting from very simple building blocks. In this logic, it is very appealing the idea of creating classes with the only purpose of being derived. The 'NonInstantiable' just defined is a perfect example of this kind of classes, though with multiple inheritance in mind and often called \emph{mixin} classes. It can be used to create a new class \texttt{NonInstantiableClock} that inherits from \texttt{Clock} and from \texttt{NonInstantiable}. \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~NonInstantiableClock(Clock,NonInstantiable):~}\\ \mbox{~~~~pass}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Now \texttt{NonInstantiableClock} is both a clock \begin{quote} \begin{verbatim}>>> from oopp import NonInstantiableClock >>> NonInstantiableClock.get_time() # works '12:57:00' \end{verbatim} \end{quote} and a non-instantiable class: \begin{quote} \begin{verbatim}>>> NonInstantiableClock() # as expected, give an error Traceback (most recent call last): File "", line 1, in ? NonInstantiableClock() # error File "oopp.py", line 245, in __new__ raise NonInstantiableError("%s cannot be instantiated" % cls) NonInstantiableError: cannot be instantiated\end{verbatim} \end{quote} Let me give a simple example of a situation where the mixin approach comes handy. Suppose that the owner of a 'Pizza-shop' needs a program to take care of all the pizzas to-go he sell. Pizzas are distinguished according to their size (small, medium or large) and their toppings. The problem can be solved by inheriting from a generic pizza factory like this: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~GenericPizza(object):~{\#}~to~be~customized}\\ \mbox{~~~~toppinglist=[]~{\#}~nothing,~default~}\\ \mbox{~~~~baseprice=1~{\#}~one~dollar,~default}\\ \mbox{~~~~topping{\_}unit{\_}price=0.5~{\#}~half~dollar~for~each~topping,~default}\\ \mbox{~~~~sizefactor={\{}'small':1,~'medium':2,~'large':3{\}}~}\\ \mbox{~~~~{\#}~a~medium~size~pizza~costs~twice~a~small~pizza,~}\\ \mbox{~~~~{\#}~a~large~pizza~costs~three~times}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(self,size):}\\ \mbox{~~~~~~~~self.size=size}\\ \mbox{~~~~def~price(self):}\\ \mbox{~~~~~~~~return~(self.baseprice+}\\ \mbox{~~~~~~~~~~~~~~~self.toppings{\_}price())*self.sizefactor[self.size]}\\ \mbox{~~~~def~toppings{\_}price(self):}\\ \mbox{~~~~~~~~return~len(self.toppinglist)*self.topping{\_}unit{\_}price}\\ \mbox{~~~~def~{\_}{\_}str{\_}{\_}(self):}\\ \mbox{~~~~~~~~return~'{\%}s~pizza~with~{\%}s,~cost~{\$}~{\%}s'~{\%}~(self.size,}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~','.join(self.toppinglist),}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~self.price())}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Here the base class 'GenericPizza' is written with inheritance in mind: one can derives many pizza classes from it by overriding the \texttt{toppinglist}; for instance one could define \begin{quote} \begin{verbatim}>>> from oopp import GenericPizza >>> class Margherita(GenericPizza): ... toppinglist=['tomato']\end{verbatim} \end{quote} The problem of this approach is that one must define dozens of different pizza subclasses (Marinara, Margherita, Capricciosa, QuattroStagioni, Prosciutto, ProsciuttoFunghi, PizzaDellaCasa, etc. etc. [\hyperlink{id30}{15}]). In such a situation, it is better to perform the generation of subclasses in a smarter way, i.e. via a customizable class factory. A simpler approach is to use always the same class and to customize its instances just after creation. Both approaches can be implemented via the following 'Customizable' mixin class, not meant to be instantiated, but rather to be \emph{inherited}: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~Customizable(object):}\\ \mbox{~~~~"""Classes~inhering~from~'Customizable'~have~a~'with'~method~acting~as}\\ \mbox{~~~~an~object~modifier~and~'With'~classmethod~acting~as~a~class~factory"""}\\ \mbox{~~~~def~with(self,**kw):}\\ \mbox{~~~~~~~~customize(self,**kw){\#}~customize~the~instance}\\ \mbox{~~~~~~~~return~self~{\#}~returns~the~customized~instance}\\ \mbox{~~~~def~With(cls,**kw):}\\ \mbox{~~~~~~~~class~ChildOf(cls):~pass~{\#}~a~new~class~inheriting~from~cls}\\ \mbox{~~~~~~~~ChildOf.{\_}{\_}name{\_}{\_}=cls.{\_}{\_}name{\_}{\_}~{\#}~by~default,~with~the~same~name}\\ \mbox{~~~~~~~~customize(ChildOf,**kw)~~~~~~~{\#}~of~the~original~class}\\ \mbox{~~~~~~~~return~ChildOf}\\ \mbox{~~~~With=classmethod(With)~}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Descendants of 'Customizable' can be customized by using 'with', that directly acts on the instances, or 'With', that returns new classes. Notice that one could make 'With' to customize the original class, without returning a new one; however, in practice, this would not be safe: I remind that changing a class modifies automatically all its instances, even instances created \emph{before} the modification. This could produce bad surprises: it is better to returns new classes, that may have the same name of the original one, but are actually completely independent from it. In order to solve the pizza shop problem we may define a 'CustomizablePizza' class \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~CustomizablePizza(GenericPizza,Customizable):}\\ \mbox{~~~~pass}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} which can be used in two ways: i) to customize instances just after creation: \begin{quote} \begin{verbatim}>>> from oopp import CustomizablePizza >>> largepizza=CustomizablePizza('large') # CustomizablePizza instance >>> largemarinara=largepizza.with(toppinglist=['tomato'],baseprice=2) >>> print largemarinara large pizza with tomato mozzarella, cost $ 7.0\end{verbatim} \end{quote} and ii) to generated customized new classes: \begin{quote} \begin{verbatim}>>> Margherita=CustomizablePizza.With( ... toppinglist=['tomato','mozzarella'], __name__='Margherita') >>> print Margherita('medium') medium pizza with tomato,mozzarella, cost $ 4.0\end{verbatim} \end{quote} The advantage of the bottom-up approach, is that the 'Customizable' class can be reused in completely different problems; for instance, it could be used as a class factory. For instance we could use it to generate a 'CustomizableClock' class as in this example: \begin{quote} \begin{verbatim}>>> from oopp import * >>> CustomizableClock=Customizable.With(get_time=staticmethod(Clock.get_time), ... __name__='CustomizableClock') #adds get_time >>> CustomizableClock.get_time() # now it works '09:57:50'\end{verbatim} \end{quote} Here 'Customizable' ``steal'' the 'get{\_}time' method from 'Clock'. However that would be a rather perverse usage ;) I wrote it to show the advantage of classmethods, more than to suggest to the reader that this is an example of good programming. \begin{quote} \begin{figure}[b]\hypertarget{id32}[16] In Italy, you can easily find ``pizzerie'' with more than 50 different kinds of pizzas (once I saw a menu with something like one hundred different combinations ;) \end{figure} \end{quote} %___________________________________________________________________________ \hypertarget{cooperative-hierarchies}{} \pdfbookmark[1]{Cooperative hierarchies}{cooperative-hierarchies} \subsection*{Cooperative hierarchies} The examples of multiple inheritance hierarchies given until now were pretty easy. The reason is that there was no interaction between the methods of the children and of the parents. However, things get more complicated (and interesting ;) when the methods in the hierarchy call each other. Let me consider an example coming from paleoantropology: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~HomoHabilis(object):}\\ \mbox{~~~~def~can(self):}\\ \mbox{~~~~~~~~print~self,'can:'}\\ \mbox{~~~~~~~~print~"~-~make~tools"}\\ \mbox{}\\ \mbox{class~HomoSapiens(HomoHabilis):}\\ \mbox{~~~~def~can(self):~{\#}overrides~HomoHabilis.can}\\ \mbox{~~~~~~~~HomoHabilis.can(self)}\\ \mbox{~~~~~~~~print~"~-~make~abstractions"}\\ \mbox{~~~~~~}\\ \mbox{class~HomoSapiensSapiens(HomoSapiens):}\\ \mbox{~~~~def~can(self):~{\#}overrides~HomoSapiens.can}\\ \mbox{~~~~~~~~HomoSapiens.can(self)}\\ \mbox{~~~~~~~~print~"~-~make~art"}\\ \mbox{}\\ \mbox{modernman=HomoSapiensSapiens()}\\ \mbox{modernman.can()}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} In this example children methods call parent methods: 'HomoSapiensSapiens.can' calls 'HomoSapiens.can' that in turns calls 'HomoHabilis.can' and the final output is: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{<{\_}{\_}main{\_}{\_}.HomoSapiensSapiens~object~at~0x814e1fc>~can:}\\ \mbox{~-~make~tools}\\ \mbox{~-~make~abstractions}\\ \mbox{~-~make~art} \end{flushleft}\end{ttfamily} \end{quote} The script works, but it is far from ideal, if code reuse and refactoring are considered important requirements. The point is that (very likely, as the research in paleoanthropology progresses) we may want to extend the hierarchy, for instance by adding a class on the top or in the middle. In the present form, this would require a non-trivial modification of the source code (especially if one think that the hierarchy could be fleshed out with dozens of others methods and attributes). However, the aim of OOP is to avoid as much as possible source code modifications. This goal can be attained in practice, if the source code is written to be friendly to extensions and improvements as much as possible. I think it is worth to spend some time in improving this example, since what can be learn here, can be lifted to real life cases. First of all, let me define a generic \emph{Homo} class, to be used as first ring of the inheritance chain (actually the first ring is 'object'): \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~Homo(PrettyPrinted):~}\\ \mbox{~~~~"""Defines~the~method~'can',~which~is~intended~to~be~overriden~}\\ \mbox{~~~~in~the~children~classes,~and~inherits~'{\_}{\_}str{\_}{\_}'~from~PrettyPrinted,}\\ \mbox{~~~~ensuring~a~nice~printing~representation~for~all~children."""}\\ \mbox{~~~~def~can(self):~}\\ \mbox{~~~~~~~~print~self,'can:'}\\ \mbox{~~~~}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Now, let me point out one of the shortcomings of the previous code: in each subclass, we explicitly call its parent class (also called super class) by its name. This is inconvenient, both because a change of name in later stages of the project would require a lot of search and replace (actually not a lot in this toy example, but you can imagine having a very big projects with dozens of named method calls) and because it makes difficult to insert a new element in the inheritance hierarchy. The solution to this problems is the \texttt{super} built-in, which provides an easy access to the methods of the superclass. \texttt{super} objects comes in two flavors: \texttt{super(cls,obj)} objects return bound methods whereas \texttt{super(cls)} objects return unbound methods. In the next code we will use the first form. The hierarchy can more elegantly be rewritten as [\hyperlink{id32}{16}] : \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{from~oopp~import~Homo}\\ \mbox{}\\ \mbox{class~HomoHabilis(Homo):}\\ \mbox{~~~~def~can(self):}\\ \mbox{~~~~~~~~super(HomoHabilis,self).can()}\\ \mbox{~~~~~~~~print~"~-~make~tools"}\\ \mbox{}\\ \mbox{class~HomoSapiens(HomoHabilis):}\\ \mbox{~~~~def~can(self):}\\ \mbox{~~~~~~~~super(HomoSapiens,self).can()}\\ \mbox{~~~~~~~~print~"~-~make~abstractions"}\\ \mbox{~~~~~~}\\ \mbox{class~HomoSapiensSapiens(HomoSapiens):}\\ \mbox{~~~~def~can(self):}\\ \mbox{~~~~~~~~super(HomoSapiensSapiens,self).can()}\\ \mbox{~~~~~~~~print~"~-~make~art"}\\ \mbox{}\\ \mbox{}\\ \mbox{HomoSapiensSapiens().can()}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} with output \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{~can:}\\ \mbox{~-~make~tools}\\ \mbox{~-~make~abstractions}\\ \mbox{~-~make~art} \end{flushleft}\end{ttfamily} \end{quote} This is not yet the most elegant form, since even if \texttt{super} avoids naming the base class explicitely, still it requires to explicitely name the class where it is defined. This is rather annoying. Removing that restriction, i.e. implementing really anonymous \texttt{super} calls, is possible but requires a good understand of private variables in inheritance. %___________________________________________________________________________ \hypertarget{inheritance-and-privacy}{} \pdfbookmark[1]{Inheritance and privacy}{inheritance-and-privacy} \subsection*{Inheritance and privacy} In order to define anonymous cooperative super calls, we need classes that know themselves, i.e. containing a reference to themselves. This is not an obvious problem as it could seems, since it cannot be solved without incurring in the biggest annoyance in inheritance: \emph{name clashing}. Name clashing happens when names and attributes defined in different ancestors overrides each other in a unwanted order. Name clashing is especially painful in the case of cooperative hierarchies and particularly in in the problem at hand. A naive solution would be to attach a plain (i.e. non-private) attribute '.this' to the class, containing a reference to itself, that can be invoked by the methods of the class. Suppose, for instance, that I want to use that attribute in the \texttt{{\_}{\_}init{\_}{\_}} method of that class. A naive attempt would be to write something like: \begin{quote} \begin{verbatim}>>> class B(object): ... def __init__(self): ... print self.this,'.__init__' # .this defined later >>> B.this=B # B.this can be set only after B has been created >>> B() \end{verbatim} \end{quote} Unfortunately, this approach does not work with cooperative hierarchies. Consider, for instance, extending 'B' with a cooperative children class 'C' as follows: \begin{quote} \begin{verbatim}>>> class C(B): ... def __init__(self): ... super(self.this,self).__init__() # cooperative call ... print type(self).this,'.__init__' >>> C.this=C\end{verbatim} \end{quote} \texttt{C.{\_}{\_}init{\_}{\_}} calls \texttt{B.{\_}{\_}init{\_}{\_}} by passing a 'C' instance, therefore \texttt{C.this} is printed and not \texttt{B.this}: \begin{quote} \begin{verbatim}>>> C() .__init__ .__init__ <__main__.C object at 0x4042ca6c>\end{verbatim} \end{quote} The problem is that the \texttt{C.this} overrides \texttt{B.this}. The only way of avoiding the name clashing is to use a private attribute \texttt{.{\_}{\_}this}, as in the following script: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~B(object):}\\ \mbox{~~~~~def~{\_}{\_}init{\_}{\_}(self):~}\\ \mbox{~~~~~~~~print~self.{\_}{\_}this,'.{\_}{\_}init{\_}{\_}'}\\ \mbox{B.{\_}B{\_}{\_}this=B}\\ \mbox{}\\ \mbox{class~C(B):}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(self):}\\ \mbox{~~~~~~~super(self.{\_}{\_}this,self).{\_}{\_}init{\_}{\_}()~{\#}~cooperative~{\_}{\_}init{\_}{\_}~}\\ \mbox{~~~~~~~print~self.{\_}{\_}this,'.{\_}{\_}init{\_}{\_}'}\\ \mbox{C.{\_}C{\_}{\_}this=C}\\ \mbox{}\\ \mbox{C()}\\ \mbox{}\\ \mbox{{\#}~output:}\\ \mbox{{\#}~~.{\_}{\_}init{\_}{\_}}\\ \mbox{{\#}~~.{\_}{\_}init{\_}{\_}}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} The script works since, due to the magic of the mangling mechanism, in \texttt{B.{\_}{\_}init{\_}{\_}}, \texttt{self.{\_}B{\_}{\_}this} i.e. \texttt{B} is retrieved, whereas in \texttt{C.{\_}{\_}init{\_}{\_}} \texttt{self.{\_}C{\_}{\_}this} i.e. \texttt{C} is retrieved. The elegance of the mechanism can be improved with an helper function that makes its arguments reflective classes, i.e. classes with a \texttt{{\_}{\_}this} private attribute: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~reflective(*classes):}\\ \mbox{~~~~"""Reflective~classes~know~themselves,~i.e.~they~possess~a~private}\\ \mbox{~~~~attribute~{\_}{\_}this~containing~a~reference~to~themselves.~If~the~class}\\ \mbox{~~~~name~starts~with~'{\_}',~the~underscores~are~stripped."""}\\ \mbox{~~~~for~c~in~classes:}\\ \mbox{~~~~~~~~name=c.{\_}{\_}name{\_}{\_}~.lstrip('{\_}')~~{\#}~in~2.3}\\ \mbox{~~~~~~~~setattr(c,'{\_}{\%}s{\_}{\_}this'~{\%}~name,c)~}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} It is trivial to rewrite the paleonthropological hierarchy in terms of anonymous cooperative super calls by using this trick. \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~HomoHabilis(Homo):}\\ \mbox{~~~~def~can(self):}\\ \mbox{~~~~~~~~super(self.{\_}{\_}this,self).can()}\\ \mbox{~~~~~~~~print~"~-~make~tools"}\\ \mbox{}\\ \mbox{class~HomoSapiens(HomoHabilis):}\\ \mbox{~~~~def~can(self):}\\ \mbox{~~~~~~~~super(self.{\_}{\_}this,self).can()}\\ \mbox{~~~~~~~~print~"~-~make~abstractions"}\\ \mbox{~~~~~~}\\ \mbox{class~HomoSapiensSapiens(HomoSapiens):}\\ \mbox{~~~~def~can(self):}\\ \mbox{~~~~~~~~super(self.{\_}{\_}this,self).can()}\\ \mbox{~~~~~~~~print~"~-~make~art"}\\ \mbox{}\\ \mbox{reflective(HomoHabilis,HomoSapiens,HomoSapiensSapiens)}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Here there is an example of usage: \begin{quote} \begin{verbatim}>>> from oopp import * >>> man=HomoSapiensSapiens(); man.can() can: - make tools - make abstractions - make art\end{verbatim} \end{quote} We may understand why it works by looking at the attributes of man: \begin{quote} \begin{verbatim}>>> print pretty(attributes(man)) _HomoHabilis__this = _HomoSapiensSapiens__this = _HomoSapiens__this = can = > formatstring = %s\end{verbatim} \end{quote} It is also interesting to notice that the hierarchy can be entirely rewritten without using cooperative methods, but using private attributes, instead. This second approach is simpler, as the following script shows: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{from~oopp~import~PrettyPrinted,attributes,pretty}\\ \mbox{}\\ \mbox{class~Homo(PrettyPrinted):}\\ \mbox{~~~~def~can(self):}\\ \mbox{~~~~~~~~print~self,'can:'}\\ \mbox{~~~~~~~~for~attr,value~in~attributes(self).iteritems():~}\\ \mbox{~~~~~~~~~~~~if~attr.endswith('{\_}{\_}attr'):~print~value}\\ \mbox{class~HomoHabilis(Homo):~}\\ \mbox{~~~~{\_}{\_}attr="~-~make~tools"}\\ \mbox{class~HomoSapiens(HomoHabilis):~}\\ \mbox{~~~~{\_}{\_}attr="~-~make~abstractions"}\\ \mbox{class~HomoSapiensSapiens(HomoSapiens):~}\\ \mbox{~~~~{\_}{\_}attr="~-~make~art"}\\ \mbox{}\\ \mbox{modernman=HomoSapiensSapiens()}\\ \mbox{modernman.can()}\\ \mbox{print~'----------------------------------{\textbackslash}nAttributes~of',modernman}\\ \mbox{print~pretty(attributes(modernman))}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Here I have replaced the complicate chain of cooperative methods with much simpler private attributes. Only the 'can' method in the 'Homo' class survives, and it is modified to print the value of the '{\_}{\_}attr' attributes. Moreover, all the classes of the hierarchy have been made 'Customizable', in view of future extensions. The second script is much shorter and much more elegant than the original one, however its logic can be a little baffling, at first. The solution to the mistery is provided by the attribute dictionary of 'moderman', given by the second part of the output: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{~can:}\\ \mbox{~-~make~abstractions~~}\\ \mbox{~-~make~art~~}\\ \mbox{~-~make~tools~}\\ \mbox{------------------------------------------}\\ \mbox{Attributes~of~:}\\ \mbox{{\_}HomoHabilis{\_}{\_}attr~=~~-~make~tools}\\ \mbox{{\_}HomoSapiensSapiens{\_}{\_}attr~=~~-~make~art}\\ \mbox{{\_}HomoSapiens{\_}{\_}attr~=~~-~make~abstractions}\\ \mbox{can~=~>}\\ \mbox{formatstring~=~{\%}s} \end{flushleft}\end{ttfamily} \end{quote} We see that, in addition to the 'can' method inherited from 'Homo', the 'with' and 'With' method inherited from 'Customizable' and the 'formatstring' inherited from 'PrettyPrinted', \texttt{moderman} has the attributes \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\_}HomoHabilis{\_}{\_}attr:'~-~make~tools'~{\#}~inherited~from~HomoHabilis}\\ \mbox{{\_}HomoSapiens{\_}{\_}attr:'~-~make~abstractions'{\#}~inherited~from~HomoSapiens}\\ \mbox{{\_}HomoSapiensSapiens{\_}{\_}attr:~'~-~make~art'~{\#}~inherited~from~HomoSapiensSapiens} \end{flushleft}\end{ttfamily} \end{quote} which origin is obvious, once one reminds the mangling mechanism associated with private variables. The important point is that the trick would \emph{not} have worked for normal attributes. Had I used as variable name 'attr' instead of '{\_}{\_}attr', the name would have been overridden: the only attribute of 'HomoSapiensSapiens' would have been ' - make art'. This example explains the advantages of private variables during inheritance: they cannot be overridden. Using private name guarantees the absence of surprises due to inheritance. If a class B has only private variables, deriving a class C from B cannot cause name clashes. Private variables have a drawbacks, too. The most obvious disadvantages is the fact that in order to customize private variables outside their defining class, one needs to pass explicitly the name of the class. For instance we could not change an attribute with the syntax \texttt{HomoHabilis.With({\_}{\_}attr=' - work the stone')}, we must write the more verbose, error prone and redundant \texttt{HomoHabilis.With({\_}HomoHabilis{\_}{\_}attr=' - work the stone')} A subtler drawback will be discussed in chapter 6. \begin{quote} \begin{figure}[b]\hypertarget{id34}[17] In single inheritance hierarchies, \texttt{super} can be dismissed in favor of \texttt{{\_}{\_}base{\_}{\_}}: for instance, \texttt{super(HomoSapiens,self).can()} is equivalent to \texttt{HomoSapiens.{\_}{\_}base{\_}{\_}.can(self)}. Nevertheless, in view of possible extensions to multiple inheritance, using \texttt{super} is a much preferable choice. \end{figure} \end{quote} %___________________________________________________________________________ \hypertarget{the-sophistication-of-descriptors}{} \pdfbookmark[0]{THE SOPHISTICATION OF DESCRIPTORS}{the-sophistication-of-descriptors} \section*{THE SOPHISTICATION OF DESCRIPTORS} Attribute descriptors are important metaprogramming tools that allows the user to customize the behavior of attributes in custom classes. For instance, attribute descriptors (or descriptors for short) can be used as method wrappers, to modify or enhance methods (this is the case for the well known staticmethods and classmethods attribute descriptors); they can also be used as attribute wrappers, to change or restrict the access to attributes (this is the case for properties). Finally, descriptors allows the user to play with the resolution order of attributes: for instance, the \texttt{super} built-in object used in (multiple) inheritance hierarchies, is implemented as an attribute descriptor. In this chapter, I will show how the user can define its own attribute descriptors and I will give some example of useful things you can do with them (in particular to add tracing and timing capabilities). %___________________________________________________________________________ \hypertarget{motivation}{} \pdfbookmark[1]{Motivation}{motivation} \subsection*{Motivation} Attribute descriptors are a recent idea (they where first introduced in Python 2.2) nevertheless, under the hood, are everywhere in Python. It is a tribute to Guido's ability of hiding Python complications that the average user can easily miss they existence. If you need to do simple things, you can very well live without the knowledge of descriptors. On the other hand, if you need difficult things (such as tracing all the attribute access of your modules) attribute descriptors, allow you to perform impressive things. Let me start by showing why the knowledge of attribute descriptors is essential for any user seriously interested in metaprogramming applications. Suppose I want to trace the methods of a clock: \begin{quote} \begin{verbatim}>>> import oopp >>> clock=oopp.Clock()\end{verbatim} \end{quote} This is easily done with the \texttt{with{\_}tracer} closure of chapter 2: \begin{quote} \begin{verbatim}>>> oopp.wrapfunctions(clock,oopp.with_tracer) >>> clock.get_time() [] Calling 'get_time' with arguments (){} ... -> '.get_time' called with result: 19:55:07 '19:55:07'\end{verbatim} \end{quote} However, this approach fails if I try to trace the entire class: \begin{quote} \begin{verbatim}>>> oopp.wrapfunctions(oopp.Clock,oopp.with_tracer) >>> oopp.Clock.get_time() # error Traceback (most recent call last): File "", line 6, in ? TypeError: unbound method _() must be called with Clock instance as first argument (got nothing instead)\end{verbatim} \end{quote} The reason is that \texttt{wrapfunctions} sets the attributes of 'Clock' by invoking \texttt{customize}, which uses \texttt{setattr}. This converts '{\_}' (i.e. the traced version of \texttt{get{\_}time}) in a regular method, not in a staticmethod! In order to trace staticmethods, one has to understand the nature of attribute descriptors. %___________________________________________________________________________ \hypertarget{functions-versus-methods}{} \pdfbookmark[1]{Functions versus methods}{functions-versus-methods} \subsection*{Functions versus methods} Attribute descriptors are essential for the implementation of one of the most basic Python features: the automatic conversion of functions in methods. As I already anticipated in chapter 1, there is a sort of magic when one writes \texttt{Clock.get{\_}time=lambda self: get{\_}time()} and Python automagically converts the right hand side, that is a function, to a left hand side that is a (unbound) method. In order to understand this magic, one needs a better comprehension of the relation between functions and methods. Actually, this relationship is quite subtle and has no analogous in mainstream programming languages. For instance, C is not OOP and has only functions, lacking the concept of method, whereas Java (as other OOP languages) has no functions, only methods. C++ has functions and methods, but functions are completely different from methods On the other hand, in Python, functions and methods can be transformed both ways. To show how it works, let me start by defining a simple printing function: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{import~{\_}{\_}main{\_}{\_}~{\#}~gives~access~to~the~{\_}{\_}main{\_}{\_}~namespace~from~the~module}\\ \mbox{}\\ \mbox{def~prn(s):}\\ \mbox{~~~~"""Given~an~evaluable~string,~print~its~value~and~its~object~reference.}\\ \mbox{~~~~Notice~that~the~evaluation~is~done~in~the~{\_}{\_}main{\_}{\_}~dictionary."""}\\ \mbox{~~~~try:~obj=eval(s,{\_}{\_}main{\_}{\_}.{\_}{\_}dict{\_}{\_})}\\ \mbox{~~~~except:~print~'problems~in~evaluating',s}\\ \mbox{~~~~else:~print~s,'=',obj,'at',hex(id(obj))}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Now, let me define a class with a method \texttt{m} equals to the identity function \texttt{f}: \begin{quote} \begin{verbatim}>>> def f(x): "Identity function"; return x ... >>> class C(object): ... m=f ... print m #here m is the function f \end{verbatim} \end{quote} We see that \emph{inside} its defining class, \texttt{m} coincides with the function \texttt{f} (the object reference is the same): \begin{quote} \begin{verbatim}>>> f \end{verbatim} \end{quote} We may retrieve \texttt{m} from \emph{outside} the class via the class dictionary [\hyperlink{id34}{17}]: \begin{quote} \begin{verbatim}>>> C.__dict__['m'] \end{verbatim} \end{quote} However, if we invoke \texttt{m} with the syntax \texttt{C.m}, then it (magically) becomes a (unbound) method: \begin{quote} \begin{verbatim}>>> C.m #here m has become a method! \end{verbatim} \end{quote} But why it is so? How comes that in the second syntax the function \texttt{f} is transformed in a (unbound) method? To answer that question, we have to understand how attributes are really invoked in Python, i.e. via attribute descriptors. %___________________________________________________________________________ \hypertarget{methods-versus-functions}{} \pdfbookmark[1]{Methods versus functions}{methods-versus-functions} \subsection*{Methods versus functions} First of all, let me point out the differences between methods and functions. Here, \texttt{C.m} does \emph{not} coincides with \texttt{C.{\_}{\_}dict{\_}{\_}['m']} i.e. \texttt{f}, since its object reference is different: \begin{quote} \begin{verbatim}>>> from oopp import prn,attributes >>> prn('C.m') C.m = at 0x81109b4\end{verbatim} \end{quote} The difference is clear since methods and functions have different attributes: \begin{quote} \begin{verbatim}>>> attributes(f).keys() ['func_closure', 'func_dict', 'func_defaults', 'func_name', 'func_code', 'func_doc', 'func_globals']\end{verbatim} \end{quote} whereas \begin{quote} \begin{verbatim}>>> attributes(C.m).keys() ['im_func', 'im_class', 'im_self']\end{verbatim} \end{quote} We discussed few of the functions attributes in the chapter on functions. The instance method attributes are simpler: \texttt{im{\_}self} returns the object to which the method is attached, \begin{quote} \begin{verbatim}>>> print C.m.im_self #unbound method, attached to the class None >>> C().m.im_self #bound method, attached to C() <__main__.C object at 0x81bf4ec> \end{verbatim} \end{quote} \texttt{im{\_}class} returns the class to which the method is attached \begin{quote} \begin{verbatim}>>> C.m.im_class #class of the unbound method >>> C().m.im_class #class of the bound method, \end{verbatim} \end{quote} and \texttt{im{\_}func} returns the function equivalent to the method. \begin{quote} \begin{verbatim}>>> C.m.im_func >>> C().m.im_func # the same \end{verbatim} \end{quote} As the reference manual states, calling \texttt{m(*args,**kw)} is completely equivalent to calling \texttt{m.im{\_}func(m.im{\_}self, *args,**kw)}``. As a general rule, an attribute descriptor is an object with a \texttt{{\_}{\_}get{\_}{\_}} special method. The most used descriptors are the good old functions: they have a \texttt{{\_}{\_}get{\_}{\_}} special method returning a \emph{method-wrapper object} \begin{quote} \begin{verbatim}>>> f.__get__ \end{verbatim} \end{quote} method-wrapper objects can be transformed in (both bound and unbound) methods: \begin{quote} \begin{verbatim}>>> f.__get__(None,C) >>> f.__get__(C(),C) >\end{verbatim} \end{quote} The general calling syntax for method-wrapper objects is \texttt{.{\_}{\_}get{\_}{\_}(obj,cls=None)}, where the first argument is an instance object or None and the second (optional) argument is the class (or a generic superclass) of the first one. Now we see what happens when we use the syntax \texttt{C.m}: Python interprets this as a shortcut for \texttt{C.{\_}{\_}dict['m'].{\_}{\_}get{\_}{\_}(None,C)} (if \texttt{m} is in the 'C' dictionary, otherwise it looks for ancestor dictionaries). We may check that everything is correct by observing that \texttt{f.{\_}{\_}get{\_}{\_}(None,C)} has exactly the same object reference than \texttt{C.m}, therefore they are the same object: \begin{quote} \begin{verbatim}>>> hex(id(f.__get__(None,C))) # same as hex(id(C.m)) '0x811095c'\end{verbatim} \end{quote} The process works equally well for the syntax \texttt{getattr}: \begin{quote} \begin{verbatim}>>> print getattr(C,'m'), hex(id(getattr(C,'m'))) 0x811095c\end{verbatim} \end{quote} and for bound methods: if \begin{quote} \begin{verbatim}>>> c=C()\end{verbatim} \end{quote} is an instance of the class C, then the syntax \begin{quote} \begin{verbatim}>>> getattr(c,'m') #same as c.m >\end{verbatim} \end{quote} is a shortcut for \begin{quote} \begin{verbatim}>>> type(c).__dict__['m'].__get__(c,C) # or f.__get__(c,C) >\end{verbatim} \end{quote} (notice that the object reference for \texttt{c.m} and \texttt{f.{\_}{\_}get{\_}{\_}(c,C)} is the same, they are \emph{exactly} the same object). Both the unbound method C.m and the bound method c.m refer to the same object at hexadecimal address 0x811095c. This object is common to all other instances of C: \begin{quote} \begin{verbatim}>>> c2=C() >>> print c2.m,hex(id(c2.m)) #always the same method > 0x811095c\end{verbatim} \end{quote} One can also omit the second argument: \begin{quote} \begin{verbatim}>>> c.m.__get__(c) >\end{verbatim} \end{quote} Finally, let me point out that methods are attribute descriptors too, since they have a \texttt{{\_}{\_}get{\_}{\_}} attribute returning a method-wrapper object: \begin{quote} \begin{verbatim}>>> C.m.__get__ \end{verbatim} \end{quote} Notice that this method wrapper is \emph{not} the same than the \texttt{f.{\_}{\_}get{\_}{\_}} method wrapper. \begin{quote} \begin{figure}[b]\hypertarget{id36}[18] If \texttt{C.{\_}{\_}dict['m']} is not defined, Python looks if \texttt{m} is defined in some ancestor of C. For instance if \titlereference{B} is the base of \titlereference{C}, it looks in \texttt{B.{\_}{\_}dict['m']}, etc., by following the MRO. \end{figure} \end{quote} %___________________________________________________________________________ \hypertarget{static-methods-and-class-methods}{} \pdfbookmark[1]{Static methods and class methods}{static-methods-and-class-methods} \subsection*{Static methods and class methods} Whereas functions and methods are implicit attribute descriptors, static methods and class methods are examples of explicit descriptors. They allow to convert regular functions to specific descriptor objects. Let me show a trivial example. Given the identity function \begin{quote} \begin{verbatim}>>> def f(x): return x\end{verbatim} \end{quote} we may convert it to a staticmethod object \begin{quote} \begin{verbatim}>>> sm=staticmethod(f) >>> sm \end{verbatim} \end{quote} or to a classmethod object \begin{quote} \begin{verbatim}>>> cm=classmethod(f) >>> cm \end{verbatim} \end{quote} In both cases the \texttt{{\_}{\_}get{\_}{\_}} special method returns a method-wrapper object \begin{quote} \begin{verbatim}>>> sm.__get__ >>> cm.__get__ \end{verbatim} \end{quote} However the static method wrapper is quite different from the class method wrapper. In the first case the wrapper returns a function: \begin{quote} \begin{verbatim}>>> sm.__get__(C(),C) >>> sm.__get__(C()) \end{verbatim} \end{quote} in the second case it returns a method \begin{quote} \begin{verbatim}>>> cm.__get__(C(),C) >\end{verbatim} \end{quote} Let me discuss more in detail the static methods, first. It is always possible to extract the function from the static method via the syntaxes \texttt{sm.{\_}{\_}get{\_}{\_}(a)} and \texttt{sm.{\_}{\_}get{\_}{\_}(a,b)} with \emph{ANY} valid a and b, i.e. the result does not depend on a and b. This is correct, since static methods are actually function that have nothing to do with the class and the instances to which they are bound. This behaviour of the method wrapper makes clear why the relation between methods and functions is inversed for static methods with respect to regular methods: \begin{quote} \begin{verbatim}>>> class C(object): ... s=staticmethod(lambda : None) ... print s ... \end{verbatim} \end{quote} Static methods are non-trivial objects \emph{inside} the class, whereas they are regular functions \emph{outside} the class: \begin{quote} \begin{verbatim}>>> C.s at 0x8158e7c> >>> C().s at 0x8158e7c>\end{verbatim} \end{quote} The situation is different for classmethods: inside the class they are non-trivial objects, just as static methods, \begin{quote} \begin{verbatim}>>> class C(object): ... cm=classmethod(lambda cls: None) ... print cm ... \end{verbatim} \end{quote} but outside the class they are methods bound to the class, \begin{quote} \begin{verbatim}>>> c=C() >>> prn('c.cm') of > 0x811095c\end{verbatim} \end{quote} and not to the instance 'c'. The reason is that the \texttt{{\_}{\_}get{\_}{\_}} wrapper method can be invoked with the syntax \texttt{{\_}{\_}get{\_}{\_}(a,cls)} which is only sensitive to the second argument or with the syntax \texttt{{\_}{\_}get{\_}{\_}(obj)} which is only sensitive to the type of the first argument: \begin{quote} \begin{verbatim}>>> cm.__get__('whatever',C) # the first argument is ignored >\end{verbatim} \end{quote} sensitive to the type of 'whatever': \begin{quote} \begin{verbatim}>>> cm.__get__('whatever') # in Python 2.2 would give a serious error >\end{verbatim} \end{quote} Notice that the class method is actually bound to C's class, i.e. to 'type'. Just as regular methods (and differently from static methods) classmethods have attributes \texttt{im{\_}class}, \texttt{im{\_}func}, and \texttt{im{\_}self}. In particular one can retrieve the function wrapped inside the classmethod with \begin{quote} \begin{verbatim}>>> cm.__get__('whatever','whatever').im_func \end{verbatim} \end{quote} The difference with regular methods is that \texttt{im{\_}class} returns the class of 'C' whereas \texttt{im{\_}self} returns 'C' itself. \begin{quote} \begin{verbatim}>>> C.cm.im_self # a classmethod is attached to the class >>> C.cm.im_class #the class of C \end{verbatim} \end{quote} Remark: Python 2.2.0 has a bug in classmethods (fixed in newer versions): when the first argument of {\_}{\_}get{\_}{\_} is None, then one must specify the second argument (otherwise segmentation fault :-() %___________________________________________________________________________ \hypertarget{properties}{} \pdfbookmark[1]{Properties}{properties} \subsection*{Properties} Properties are a more general kind of attribute descriptors than staticmethods and classmethods, since their effect can be customized trough arbitrary get/set/del functions. Let me give an example: \begin{quote} \begin{verbatim}>>> def getp(self): return 'property' # get function ... >>> p=property(getp) # property object >>> p \end{verbatim} \end{quote} \texttt{p} has a \texttt{{\_}{\_}get{\_}{\_}} special method returning a method-wrapper object, just as it happens for other descriptors: \begin{quote} \begin{verbatim}>>> p.__get__ \end{verbatim} \end{quote} The difference is that \begin{quote} \begin{verbatim}>>> p.__get__(None,type(p)) >>> p.__get__('whatever') 'property' >>> p.__get__('whatever','whatever') 'property'\end{verbatim} \end{quote} As for static methods, the \texttt{{\_}{\_}get{\_}{\_}} method wrapper is independent from its arguments, unless the first one is None: in such a case it returns the property object, in all other circumstances it returns the result of \texttt{getp}. This explains the behavior \begin{quote} \begin{verbatim}>>> class C(object): p=p >>> C.p >>> C().p 'property'\end{verbatim} \end{quote} Properties are a dangerous feature, since they change the semantics of the language. This means that apparently trivial operations can have any kind of side effects: \begin{quote} \begin{verbatim}>>> def get(self):return 'You gave me the order to destroy your hard disk!!' >>> class C(object): x=property(get) >>> C().x 'You gave me the order to destroy your hard disk!!'\end{verbatim} \end{quote} Invoking 'C.x' could very well invoke an external program who is going to do anything! It is up to the programmer to not abuse properties. The same is true for user defined attribute descriptors. There are situations in which they are quite handy, however. For instance, properties can be used to trace the access data attributes. This can be especially useful during debugging, or for logging purposes. Notice that this approach has the problem that now data attributes cannot no more be called trough their class, but only though their instances. Moreover properties do not work well with \texttt{super} in cooperative methods. %___________________________________________________________________________ \hypertarget{user-defined-attribute-descriptors}{} \pdfbookmark[1]{User-defined attribute descriptors}{user-defined-attribute-descriptors} \subsection*{User-defined attribute descriptors} As we have seen, there are plenty of predefined attribute descriptors, such as staticmethods, classmethods and properties (the built-in \texttt{super} is also an attribute descriptor which, for sake of convenience, will be discussed in the next section). In addition to them, the user can also define customized attribute descriptors, simply trough classes with a \texttt{{\_}{\_}get{\_}{\_}} special method. Let me give an example: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~ChattyAttr(object):}\\ \mbox{~~~~"""Chatty~descriptor~class;~descriptor~objects~are~intended~to~be~}\\ \mbox{~~~~used~as~attributes~in~other~classes"""}\\ \mbox{~~~~def~{\_}{\_}get{\_}{\_}(self,~obj,~cls=None):}\\ \mbox{~~~~~~~~binding=obj~is~not~None}\\ \mbox{~~~~~~~~if~~binding:}\\ \mbox{~~~~~~~~~~~~return~'You~are~binding~{\%}s~to~{\%}s'~{\%}~(self,obj)}\\ \mbox{~~~~~~~~else:}\\ \mbox{~~~~~~~~~~~~return~'Calling~{\%}s~from~{\%}s'~{\%}~(self,cls)}\\ \mbox{}\\ \mbox{class~C(object):}\\ \mbox{~~~~d=ChattyAttr()}\\ \mbox{}\\ \mbox{c=C()}\\ \mbox{}\\ \mbox{print~c.d~{\#}~<=>~type(c).{\_}{\_}dict{\_}{\_}['d'].{\_}{\_}get{\_}{\_}(c,type(c))}\\ \mbox{print~C.d~{\#}~<=>~C.{\_}{\_}dict{\_}{\_}['d'].{\_}{\_}get{\_}{\_}(None,C)}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} with output: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{You~are~binding~~to~}\\ \mbox{}\\ \mbox{Calling~~from~} \end{flushleft}\end{ttfamily} \end{quote} Invoking a method with the syntax \texttt{C.d} or \texttt{c.d} involves calling \texttt{{\_}{\_}get{\_}{\_}}. The \texttt{{\_}{\_}get{\_}{\_}} signature is fixed: it is `` {\_}{\_}get{\_}{\_}={\_}{\_}get{\_}{\_}(self,obj,cls=None)``, since the notation \texttt{self.descr{\_}attr} automatically passes \texttt{self} and \texttt{self.{\_}{\_}class{\_}{\_}} to \texttt{{\_}{\_}get{\_}{\_}}. Custom descriptors can be used to restrict the access to objects in a more general way than trough properties. For instance, suppose one wants to raise an error if a given attribute 'a' is accessed, both from the class and from the instance: a property cannot help here, since it works only from the instance. The solution is the following custom descriptor: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~AccessError(object):}\\ \mbox{~~~~"""Descriptor~raising~an~AttributeError~when~the~attribute~is~}\\ \mbox{~~~~accessed"""~{\#}could~be~done~with~a~property}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(self,errormessage):}\\ \mbox{~~~~~~~~self.msg=errormessage}\\ \mbox{~~~~def~{\_}{\_}get{\_}{\_}(self,obj,cls=None):}\\ \mbox{~~~~~~~~raise~AttributeError(self.msg)}\\ \mbox{}\\ \mbox{{\#}}\\ \mbox{}\\ \mbox{>>>~from~oopp~import~AccessError}\\ \mbox{>>>~class~C(object):}\\ \mbox{...~~~~a=AccessError("'a'~cannot~be~accessed")}\\ \mbox{>>>~c=C()}\\ \mbox{>>>~c.a~{\#}error}\\ \mbox{Traceback~(most~recent~call~last):}\\ \mbox{~~File~"",~line~1,~in~?}\\ \mbox{~~File~"oopp.py",~line~313,~in~{\_}{\_}get{\_}{\_}}\\ \mbox{~~~~raise~AttributeError(self.msg)}\\ \mbox{AttributeError:~'a'~cannot~be~accessed}\\ \mbox{>>>~C.a~{\#}error}\\ \mbox{Traceback~(most~recent~call~last):}\\ \mbox{~~File~"",~line~1,~in~?}\\ \mbox{~~File~"oopp.py",~line~313,~in~{\_}{\_}get{\_}{\_}}\\ \mbox{~~~~raise~AttributeError(self.msg)}\\ \mbox{AttributeError:~'a'~cannot~be~accessed} \end{flushleft}\end{ttfamily} \end{quote} It is always possibile to convert plain attributes (i.e. attributes without a ''{\_}{\_}get{\_}{\_}`` method) to descriptor objects: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~convert2descriptor(object):}\\ \mbox{~~~~"""To~all~practical~means,~this~class~acts~as~a~function~that,~given~an}\\ \mbox{~~~~object,~adds~to~it~a~{\_}{\_}get{\_}{\_}~method~if~it~is~not~already~there.~The~}\\ \mbox{~~~~added~{\_}{\_}get{\_}{\_}~method~is~trivial~and~simply~returns~the~original~object,~}\\ \mbox{~~~~independently~from~obj~and~cls."""}\\ \mbox{~~~~def~{\_}{\_}new{\_}{\_}(cls,a):}\\ \mbox{~~~~~~~~if~hasattr(a,"{\_}{\_}get{\_}{\_}"):~{\#}~do~nothing}\\ \mbox{~~~~~~~~~~~~return~a~{\#}~a~is~already~a~descriptor}\\ \mbox{~~~~~~~~else:~{\#}~creates~a~trivial~attribute~descriptor}\\ \mbox{~~~~~~~~~~~~cls.a=a}\\ \mbox{~~~~~~~~~~~~return~object.{\_}{\_}new{\_}{\_}(cls)}\\ \mbox{~~~~def~{\_}{\_}get{\_}{\_}(self,obj,cls=None):}\\ \mbox{~~~~~~~~"Returns~self.a~independently~from~obj~and~cls"}\\ \mbox{~~~~~~~~return~self.a}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} This example also shows the magic of \texttt{{\_}{\_}new{\_}{\_}}, that allows to use a class as a function. The output of 'convert2descriptor(a)' can be both an instance of 'convert2descriptor' (in this case 'convert2descriptor' acts as a normal class, i.e. as an object factory) or 'a' itself (if 'a' is already a descriptor): in this case 'convert2descriptor' acts as a function. For instance, a string is converted to a descriptor \begin{quote} \begin{verbatim}>>> from oopp import convert2descriptor >>> a2=convert2descriptor('a') >>> a2 >>> a2.__get__('whatever') 'a'\end{verbatim} \end{quote} whereas a function is untouched: \begin{quote} \begin{verbatim}>>> def f(): pass >>> f2=convert2descriptor(f) # does nothing >>> f2 \end{verbatim} \end{quote} %___________________________________________________________________________ \hypertarget{data-descriptors}{} \pdfbookmark[1]{Data descriptors}{data-descriptors} \subsection*{Data descriptors} It is also possible to specify a \texttt{{\_}{\_}set{\_}{\_}} method (descriptors with a \texttt{{\_}{\_}set{\_}{\_}} method are typically data descriptors) with the signature \texttt{{\_}{\_}set{\_}{\_}(self,obj,value)} as in the following example: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~DataDescriptor(object):}\\ \mbox{~~~~value=None}\\ \mbox{~~~~def~{\_}{\_}get{\_}{\_}(self,~obj,~cls=None):}\\ \mbox{~~~~~~~~if~obj~is~None:~obj=cls}\\ \mbox{~~~~~~~~print~"Getting",obj,"value~=",self.value}\\ \mbox{~~~~~~~~return~self.value}\\ \mbox{~~~~def~{\_}{\_}set{\_}{\_}(self,~obj,~value):}\\ \mbox{~~~~~~~~self.value=value}\\ \mbox{~~~~~~~~print~"Setting",obj,"value~=",value}\\ \mbox{}\\ \mbox{class~C(object):}\\ \mbox{~~~~d=DataDescriptor()}\\ \mbox{}\\ \mbox{c=C()}\\ \mbox{}\\ \mbox{c.d=1~{\#}calls~C.{\_}{\_}dict{\_}{\_}['d'].{\_}{\_}set{\_}{\_}(c,1)}\\ \mbox{c.d~~~{\#}calls~C.{\_}{\_}dict{\_}{\_}['d'].{\_}{\_}get{\_}{\_}(c,C)}\\ \mbox{C.d~~~{\#}calls~C.{\_}{\_}dict{\_}{\_}['d'].{\_}{\_}get{\_}{\_}(None,C)}\\ \mbox{C.d=0~{\#}does~*not*~call~{\_}{\_}set{\_}{\_}}\\ \mbox{print~"C.d~=",C.d}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} With output: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{Setting~~value~=~1}\\ \mbox{Getting~~value~=~1}\\ \mbox{Getting~~value~=~1~~~~~~}\\ \mbox{C.d~=~0} \end{flushleft}\end{ttfamily} \end{quote} With this knowledge, we may now reconsider the clock example given in chapter 3. {\#}NO!?? \begin{quote} \begin{verbatim}>>> import oopp >>> class Clock(object): pass >>> myclock=Clock() ... >>> myclock.get_time=oopp.get_time # this is a function >>> Clock.get_time=lambda self : oopp.get_time() # this is a method \end{verbatim} \end{quote} In this example, \texttt{myclock.get{\_}time}, which is attached to the \texttt{myclock} object, is a function, whereas \texttt{Clock.get{\_}time}, which is attached to the \texttt{Clock} class is a method. We may also check this by using the \texttt{type} function: \begin{quote} \begin{verbatim}>>> type(myclock.get_time) \end{verbatim} \end{quote} whereas \begin{quote} \begin{verbatim}>>> type(Clock.get_time) \end{verbatim} \end{quote} It must be remarked that user-defined attribute descriptors, just as properties, allow to arbitrarily change the semantics of the language and should be used with care. %___________________________________________________________________________ \hypertarget{the-super-attribute-descriptor}{} \pdfbookmark[1]{The super attribute descriptor}{the-super-attribute-descriptor} \subsection*{The \texttt{super} attribute descriptor} super has also a second form, where it is more used as a descriptor. \texttt{super} objects are attribute descriptors, too, with a \texttt{{\_}{\_}get{\_}{\_}} method returning a method-wrapper object: \begin{quote} \begin{verbatim}>>> super(C,C()).__get__ \end{verbatim} \end{quote} Here I give some example of acceptable call: \begin{quote} \begin{verbatim}>>> super(C,C()).__get__('whatever') , > >>> super(C,C()).__get__('whatever','whatever') , >\end{verbatim} \end{quote} Unfortunately, for the time being (i.e. for Python 2.3), the \texttt{super} mechanism has various limitations. To show the issues, let me start by considering the following base class: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~ExampleBaseClass(PrettyPrinted):}\\ \mbox{~~~~"""Contains~a~regular~method~'m',~a~staticmethod~'s',~a~classmethod~}\\ \mbox{~~~~'c',~a~property~'p'~and~a~data~attribute~'d'."""}\\ \mbox{~~~~m=lambda~self:~'regular~method~of~{\%}s'~{\%}~self}\\ \mbox{~~~~s=staticmethod(lambda~:~'staticmethod')}\\ \mbox{~~~~c=classmethod(lambda~cls:~'classmethod~of~{\%}s'~{\%}~cls)}\\ \mbox{~~~~p=property(lambda~self:~'property~of~{\%}s'~{\%}~self)}\\ \mbox{~~~~a=AccessError('Expected~error')}\\ \mbox{~~~~d='data'}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Now, let me derive a new class C from ExampleBaseClass: \begin{quote} \begin{verbatim}>>> from oopp import ExampleBaseClass >>> class C(ExampleBaseClass): pass >>> c=C()\end{verbatim} \end{quote} Ideally, we would like to retrieve the methods and attributes of ExampleBaseClass from C, by using the \texttt{super} mechanism. \newcounter{listcnt24} \begin{list}{\arabic{listcnt24}.} { \usecounter{listcnt24} \setlength{\rightmargin}{\leftmargin} } \item {} We see that \texttt{super} works without problems for regular methods, staticmethods and classmethods: \end{list} \begin{quote} \begin{verbatim}>>> super(C,c).m() 'regular method of ' >>> super(C,c).s() 'staticmethod' >>> super(C,c).c() "classmethod of "\end{verbatim} \end{quote} It also works for user defined attribute descriptors: \begin{quote} \begin{verbatim}>>> super(C,c).a # access error Traceback (most recent call last): File "", line 1, in ? File "oopp.py", line 340, in __get__ raise AttributeError(self.msg) AttributeError: Expected error\end{verbatim} \end{quote} and for properties (only for Python 2.3+): \begin{quote} \begin{verbatim}>>> ExampleBaseClass.p \end{verbatim} \end{quote} In Python 2.2 one would get an error, instead \begin{quote} \begin{verbatim}>>> super(C,c).p #error Traceback (most recent call last): File "", line 1, in ? AttributeError: 'super' object has no attribute 'p'\end{verbatim} \end{quote} 3. Moreover, certain attributes of the superclass, such as its \texttt{{\_}{\_}name{\_}{\_}}, cannot be retrieved: \begin{quote} \begin{verbatim}>>> ExampleBaseClass.__name__ 'ExampleBaseClass' >>> super(C,c).__name__ #error Traceback (most recent call last): File "", line 1, in ? AttributeError: 'super' object has no attribute '__name__'\end{verbatim} \end{quote} \newcounter{listcnt25} \begin{list}{\arabic{listcnt25}.} { \usecounter{listcnt25} \addtocounter{listcnt25}{3} \setlength{\rightmargin}{\leftmargin} } \item {} There is no direct way to retrieve the methods of the super-superclass (i.e. the grandmother class, if you wish) or in general the furthest ancestors, since \texttt{super} does not chain. \item {} Finally, there are some subtle issues with the \texttt{super(cls)} syntax: \end{list} \begin{quote} \begin{verbatim}>>> super(C).m #(2) error Traceback (most recent call last): File "", line 1, in ? AttributeError: 'super' object has no attribute 'm'\end{verbatim} \end{quote} means \texttt{super(C).{\_}{\_}get{\_}{\_}(None,C)}, but only \texttt{super(C).{\_}{\_}get{\_}{\_}(c,C).m==super(C,c)} works. \begin{quote} \begin{quote} On the other hand, \end{quote} \begin{verbatim}>>> super(C).__init__ #(1) >>> super(C).__new__ #(1) \end{verbatim} \begin{quote} seems to work, whereas in reality does not. The reason is that since \texttt{super} objects are instances of \texttt{object}, they inherit object's methods, and in particular \texttt{{\_}{\_}init{\_}{\_}} ; therefore the \texttt{{\_}{\_}init{\_}{\_}} method in (1) is \emph{not} the \texttt{ExampleBaseClass.{\_}{\_}init{\_}{\_}} method. The point is that \texttt{super} objects are attribute descriptors and not references to the superclass. \end{quote} \end{quote} Probably, in future versions of Python the \texttt{super} mechanism will be improved. However, for the time being, one must provide a workaround for dealing with these issues. This will be discussed in the next chapter. %___________________________________________________________________________ \hypertarget{method-wrappers}{} \pdfbookmark[1]{Method wrappers}{method-wrappers} \subsection*{Method wrappers} One of the most typical applications of attribute descriptors is their usage as \emph{method wrappers}. Suppose, for instance, one wants to add tracing capabilities to the methods of a class for debugging purposes. The problem can be solved with a custom descriptor class: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{import~inspect}\\ \mbox{}\\ \mbox{class~wrappedmethod(Customizable):}\\ \mbox{~~~~"""Customizable~method~factory~intended~for~derivation.}\\ \mbox{~~~~The~wrapper~method~is~overridden~in~the~children."""}\\ \mbox{}\\ \mbox{~~~~logfile=sys.stdout~{\#}~default}\\ \mbox{~~~~namespace=''~{\#}~default}\\ \mbox{}\\ \mbox{~~~~def~{\_}{\_}new{\_}{\_}(cls,meth):~{\#}~meth~is~a~descriptor}\\ \mbox{~~~~~~~~if~isinstance(meth,FunctionType):}\\ \mbox{~~~~~~~~~~~~kind=0~{\#}~regular~method}\\ \mbox{~~~~~~~~~~~~func=meth}\\ \mbox{~~~~~~~~elif~isinstance(meth,staticmethod):}\\ \mbox{~~~~~~~~~~~~kind=1~{\#}~static~method}\\ \mbox{~~~~~~~~~~~~func=meth.{\_}{\_}get{\_}{\_}('whatever')}\\ \mbox{~~~~~~~~elif~isinstance(meth,classmethod):}\\ \mbox{~~~~~~~~~~~~kind=2~{\#}~class~method}\\ \mbox{~~~~~~~~~~~~func=meth.{\_}{\_}get{\_}{\_}('whatever','whatever').im{\_}func~}\\ \mbox{~~~~~~~~elif~isinstance(meth,wrappedmethod):~{\#}~already~wrapped}\\ \mbox{~~~~~~~~~~~~return~meth~{\#}~do~nothing}\\ \mbox{~~~~~~~~elif~inspect.ismethoddescriptor(meth):}\\ \mbox{~~~~~~~~~~~~kind=0;~func=meth~{\#}~for~many~builtin~methods~}\\ \mbox{~~~~~~~~else:}\\ \mbox{~~~~~~~~~~~~return~meth~{\#}~do~nothing}\\ \mbox{~~~~~~~~self=super(wrappedmethod,cls).{\_}{\_}new{\_}{\_}(cls)}\\ \mbox{~~~~~~~~self.kind=kind;~self.func=func~{\#}~pre-initialize}\\ \mbox{~~~~~~~~return~self}\\ \mbox{}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(self,meth):~{\#}~meth~not~used}\\ \mbox{~~~~~~~~self.logfile=self.logfile~{\#}~default~values}\\ \mbox{~~~~~~~~self.namespace=self.namespace~{\#}~copy~the~current}\\ \mbox{}\\ \mbox{~~~~def~{\_}{\_}get{\_}{\_}(self,obj,cls):~{\#}~closure~}\\ \mbox{~~~~~~~~def~{\_}(*args,**kw):}\\ \mbox{~~~~~~~~~~~~if~obj~is~None:~o=()~{\#}~unbound~method~call}\\ \mbox{~~~~~~~~~~~~else:~o=(obj,)~{\#}~bound~method~call}\\ \mbox{~~~~~~~~~~~~allargs=[o,(),(cls,)][self.kind]+args~}\\ \mbox{~~~~~~~~~~~~return~self.wrapper()(*allargs,**kw)}\\ \mbox{~~~~~~~~return~{\_}~{\#}~the~wrapped~function}\\ \mbox{~~~~~~~~{\#}~allargs~is~the~only~nontrivial~line~in~{\_};~it~adds}\\ \mbox{~~~~~~~~{\#}~0~-~obj~if~meth~is~a~regular~method}\\ \mbox{~~~~~~~~{\#}~1~-~nothing~if~meth~is~a~static~method}\\ \mbox{~~~~~~~~{\#}~2~-~cls~if~meth~is~a~class~method}\\ \mbox{}\\ \mbox{~~~~def~wrapper(self):~return~self.func~{\#}~do~nothing,~to~be~overridden}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} This class is intended for derivation: the wrapper method has to be overridden in the children in order to introduce the wanted feature. If I want to implement the capability of tracing methods, I can reuse the \texttt{with{\_}tracer} closure introduced in chapter 2: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~tracedmethod(wrappedmethod):}\\ \mbox{~~~~def~wrapper(self):}\\ \mbox{~~~~~~~~return~with{\_}tracer(self.func,self.namespace,self.logfile)}\\ \mbox{~~~~~~~~}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Nothing prevents me from introducing timing features by reusing the \texttt{with{\_}timer} closure: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~timedmethod(wrappedmethod):}\\ \mbox{~~~~iterations=1~{\#}~additional~default~parameter}\\ \mbox{}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(self,meth):}\\ \mbox{~~~~~~~~super(timedmethod,self).{\_}{\_}init{\_}{\_}(self,meth)}\\ \mbox{~~~~~~~~self.iterations=self.iterations~{\#}~copy}\\ \mbox{}\\ \mbox{~~~~def~wrapper(self):}\\ \mbox{~~~~~~~~return~with{\_}timer(self.func,self.namespace,}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~self.iterations,self.logfile)}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Here there is an example of usage: The dictionary of wrapped functions is then built from an utility function \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~wrap(obj,wrapped,condition=lambda~k,v:~True,~err=None):}\\ \mbox{~~~~"Retrieves~obj's~dictionary~and~wraps~it"}\\ \mbox{~~~~if~isinstance(obj,dict):~{\#}~obj~is~a~dictionary~}\\ \mbox{~~~~~~~~dic=obj}\\ \mbox{~~~~else:~}\\ \mbox{~~~~~~~~dic=getattr(obj,'{\_}{\_}dict{\_}{\_}',{\{}{\}}).copy()~{\#}~avoids~dictproxy~objects}\\ \mbox{~~~~~~~~if~not~dic:~dic=attributes(obj)~{\#}~for~simple~objects}\\ \mbox{~~~~wrapped.namespace=getattr(obj,'{\_}{\_}name{\_}{\_}','')}\\ \mbox{~~~~for~name,attr~in~dic.iteritems():~{\#}~modify~dic}\\ \mbox{~~~~~~~~if~condition(name,attr):~dic[name]=wrapped(attr)}\\ \mbox{~~~~if~not~isinstance(obj,dict):~{\#}~modify~obj}\\ \mbox{~~~~~~~~customize(obj,err,**dic)~}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{from~oopp~import~*}\\ \mbox{}\\ \mbox{class~C(object):~}\\ \mbox{~~~~"Class~with~traced~methods"}\\ \mbox{}\\ \mbox{~~~~def~f(self):~return~self~}\\ \mbox{~~~~f=tracedmethod(f)}\\ \mbox{}\\ \mbox{~~~~g=staticmethod(lambda:None)}\\ \mbox{~~~~g=tracedmethod(g)}\\ \mbox{}\\ \mbox{~~~~h=classmethod(do{\_}nothing)}\\ \mbox{~~~~h=tracedmethod(h)}\\ \mbox{}\\ \mbox{c=C()}\\ \mbox{}\\ \mbox{{\#}unbound~calls}\\ \mbox{C.f(c)~}\\ \mbox{C.g()}\\ \mbox{C.h()}\\ \mbox{}\\ \mbox{{\#}bound~calls}\\ \mbox{c.f()~~}\\ \mbox{c.g()}\\ \mbox{c.h()}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Output: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{[C]~Calling~'f'~with~arguments}\\ \mbox{(,){\{}{\}}~...}\\ \mbox{->~'C.f'~called~with~result:~}\\ \mbox{}\\ \mbox{[C]~Calling~''~with~arguments}\\ \mbox{(){\{}{\}}~...}\\ \mbox{->~'C.'~called~with~result:~None}\\ \mbox{}\\ \mbox{[C]~Calling~'do{\_}nothing'~with~arguments}\\ \mbox{(,){\{}{\}}~...}\\ \mbox{->~'C.do{\_}nothing'~called~with~result:~None}\\ \mbox{}\\ \mbox{[C]~Calling~'f'~with~arguments}\\ \mbox{(,){\{}{\}}~...}\\ \mbox{->~'C.f'~called~with~result:~}\\ \mbox{}\\ \mbox{[C]~Calling~''~with~arguments}\\ \mbox{(){\{}{\}}~...}\\ \mbox{->~'C.'~called~with~result:~None}\\ \mbox{}\\ \mbox{[C]~Calling~'do{\_}nothing'~with~arguments}\\ \mbox{(,){\{}{\}}~...}\\ \mbox{->~'C.do{\_}nothing'~called~with~result:~None} \end{flushleft}\end{ttfamily} \end{quote} The approach in 'tracingmethods.py' works, but it is far from being elegant, since I had to explicitly wrap each method in the class by hand. Both problems can be avoided. \begin{quote} \begin{verbatim}>>> from oopp import * >>> wrap(Clock,tracedmethod) >>> Clock.get_time() [Clock] Calling 'get_time' with arguments (){} ... -> 'Clock.get_time' called with result: 21:56:52 '21:56:52'\end{verbatim} \end{quote} %___________________________________________________________________________ \hypertarget{the-subtleties-of-multiple-inheritance}{} \pdfbookmark[0]{THE SUBTLETIES OF MULTIPLE INHERITANCE}{the-subtleties-of-multiple-inheritance} \section*{THE SUBTLETIES OF MULTIPLE INHERITANCE} In chapter 4 we introduced the concept of multiple inheritance and discussed its simplest applications in absence of name collisions. When with methods with different names are derived from different classes multiple inheritance is pretty trivial. However, all kind of subtilites comes in presence of name clashing, i.e. when we multiply inherits different methods defined in different classes but with the \emph{same} name. In order to understand what happens in this situation, it is essential to understand the concept of Method Resolution Order (MRO). For reader's convenience, I collect in this chapter some of the information reported in \href{http://www.python.org/2.3/mro.html}{http://www.python.org/2.3/mro.html}. %___________________________________________________________________________ \hypertarget{a-little-bit-of-history-why-python-2-3-has-changed-the-mro}{} \pdfbookmark[1]{A little bit of history: why Python 2.3 has changed the MRO}{a-little-bit-of-history-why-python-2-3-has-changed-the-mro} \subsection*{A little bit of history: why Python 2.3 has changed the MRO} Everything started with a post by Samuele Pedroni to the Python development mailing list [\hyperlink{id36}{18}]. In his post, Samuele showed that the Python 2.2 method resolution order is not monotonic and he proposed to replace it with the C3 method resolution order. Guido agreed with his arguments and therefore now Python 2.3 uses C3. The C3 method itself has nothing to do with Python, since it was invented by people working on Dylan and it is described in a paper intended for lispers [\hyperlink{id40}{19}]. The present paper gives a (hopefully) readable discussion of the C3 algorithm for Pythonistas who want to understand the reasons for the change. First of all, let me point out that what I am going to say only applies to the \emph{new style classes} introduced in Python 2.2: \emph{classic classes} maintain their old method resolution order, depth first and then left to right. Therefore, there is no breaking of old code for classic classes; and even if in principle there could be breaking of code for Python 2.2 new style classes, in practice the cases in which the C3 resolution order differs from the Python 2.2 method resolution order are so rare that no real breaking of code is expected. Therefore: don't be scared! Moreover, unless you make strong use of multiple inheritance and you have non-trivial hierarchies, you don't need to understand the C3 algorithm, and you can easily skip this paper. On the other hand, if you really want to know how multiple inheritance works, then this paper is for you. The good news is that things are not as complicated as you might expect. Let me begin with some basic definitions. \newcounter{listcnt26} \begin{list}{\arabic{listcnt26})} { \usecounter{listcnt26} \setlength{\rightmargin}{\leftmargin} } \item {} Given a class C in a complicated multiple inheritance hierarchy, it is a non-trivial task to specify the order in which methods are overridden, i.e. to specify the order of the ancestors of C. \item {} The list of the ancestors of a class C, including the class itself, ordered from the nearest ancestor to the furthest, is called the class precedence list or the \emph{linearization} of C. \item {} The \emph{Method Resolution Order} (MRO) is the set of rules that construct the linearization. In the Python literature, the idiom ''the MRO of C`` is also used as a synonymous for the linearization of the class C. \item {} For instance, in the case of single inheritance hierarchy, if C is a subclass of C1, and C1 is a subclass of C2, then the linearization of C is simply the list [C, C1 , C2]. However, with multiple inheritance hierarchies, it is more difficult to construct a linearization that respects \emph{local precedence ordering} and \emph{monotonicity}. \item {} I will discuss the local precedence ordering later, but I can give the definition of monotonicity here. A MRO is monotonic when the following is true: \emph{if C1 precedes C2 in the linearization of C, then C1 precedes C2 in the linearization of any subclass of C}. Otherwise, the innocuous operation of deriving a new class could change the resolution order of methods, potentially introducing very subtle bugs. Examples where this happens will be shown later. \item {} Not all classes admit a linearization. There are cases, in complicated hierarchies, where it is not possible to derive a class such that its linearization respects all the desired properties. \end{list} Here I give an example of this situation. Consider the hierarchy \begin{quote} \begin{verbatim}>>> O = object >>> class X(O): pass >>> class Y(O): pass >>> class A(X,Y): pass >>> class B(Y,X): pass\end{verbatim} \end{quote} which can be represented with the following inheritance graph, where I have denoted with O the \texttt{object} class, which is the beginning of any hierarchy for new style classes: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{~-----------}\\ \mbox{|~~~~~~~~~~~|}\\ \mbox{|~~~~O~~~~~~|}\\ \mbox{|~~/~~~{\textbackslash}~~~~|}\\ \mbox{~-~X~~~~Y~~/}\\ \mbox{~~~|~~/~|~/}\\ \mbox{~~~|~/~~|/}\\ \mbox{~~~A~~~~B}\\ \mbox{~~~{\textbackslash}~~~/}\\ \mbox{~~~~~?} \end{flushleft}\end{ttfamily} \end{quote} In this case, it is not possible to derive a new class C from A and B, since X precedes Y in A, but Y precedes X in B, therefore the method resolution order would be ambiguous in C. Python 2.3 raises an exception in this situation (TypeError: MRO conflict among bases Y, X) forbidding the naive programmer from creating ambiguous hierarchies. Python 2.2 instead does not raise an exception, but chooses an \emph{ad hoc} ordering (CABXYO in this case). %___________________________________________________________________________ \hypertarget{the-c3-method-resolution-order}{} \pdfbookmark[1]{The C3 Method Resolution Order}{the-c3-method-resolution-order} \subsection*{The C3 Method Resolution Order} Let me introduce a few simple notations which will be useful for the following discussion. I will use the shortcut notation \begin{quote} C1 C2 ... CN \end{quote} to indicate the list of classes [C1, C2, ... , CN]. The \emph{head} of the list is its first element: \begin{quote} head = C1 \end{quote} whereas the \emph{tail} is the rest of the list: \begin{quote} tail = C2 ... CN. \end{quote} I shall also use the notation \begin{quote} C + (C1 C2 ... CN) = C C1 C2 ... CN \end{quote} to denote the sum of the lists [C] + [C1, C2, ... ,CN]. Now I can explain how the MRO works in Python 2.3. Consider a class C in a multiple inheritance hierarchy, with C inheriting from the base classes B1, B2, ... , BN. We want to compute the linearization L[C] of the class C. In order to do that, we need the concept of \emph{merging} lists, since the rule says that \begin{quote} \emph{the linearization of C is the sum of C plus the merge of a) the linearizations of the parents and b) the list of the parents.} \end{quote} In symbolic notation: \begin{quote} L[C(B1 ... BN)] = C + merge(L[B1] ... L[BN], B1 ... BN) \end{quote} How is the merge computed? The rule is the following: \begin{quote} \emph{take the head of the first list, i.e L[B1][0]; if this head is not in the tail of any of the other lists, then add it to the linearization of C and remove it from the lists in the merge, otherwise look at the head of the next list and take it, if it is a good head. Then repeat the operation until all the class are removed or it is impossible to find good heads. In this case, it is impossible to construct the merge, Python 2.3 will refuse to create the class C and will raise an exception.} \end{quote} This prescription ensures that the merge operation \emph{preserves} the ordering, if the ordering can be preserved. On the other hand, if the order cannot be preserved (as in the example of serious order disagreement discussed above) then the merge cannot be computed. The computation of the merge is trivial if: \newcounter{listcnt27} \begin{list}{\arabic{listcnt27}.} { \usecounter{listcnt27} \setlength{\rightmargin}{\leftmargin} } \item {} C is the \texttt{object} class, which has no parents; in this case its linearization coincides with itself, \begin{quote} L[object] = object. \end{quote} \item {} C has only one parent (single inheritance); in this case \begin{quote} L[C(B)] = C + merge(L[B],B) = C + L[B] \end{quote} \end{list} However, in the case of multiple inheritance things are more cumbersome and I don't expect you can understand the rule without a couple of examples ;-) %___________________________________________________________________________ \hypertarget{examples}{} \pdfbookmark[1]{Examples}{examples} \subsection*{Examples} First example. Consider the following hierarchy: \begin{quote} \begin{verbatim}>>> O = object >>> class F(O): pass >>> class E(O): pass >>> class D(O): pass >>> class C(D,F): pass >>> class B(D,E): pass >>> class A(B,C): pass\end{verbatim} \end{quote} In this case the inheritance graph can be drawn as \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~6}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~---}\\ \mbox{Level~3~~~~~~~~~~~~~~~~~|~O~|~~~~~~~~~~~~~~~~~~(more~general)}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~/~~---~~{\textbackslash}}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~/~~~~|~~~~{\textbackslash}~~~~~~~~~~~~~~~~~~~~~~|}\\ \mbox{~~~~~~~~~~~~~~~~~~~~/~~~~~|~~~~~{\textbackslash}~~~~~~~~~~~~~~~~~~~~~|}\\ \mbox{~~~~~~~~~~~~~~~~~~~/~~~~~~|~~~~~~{\textbackslash}~~~~~~~~~~~~~~~~~~~~|}\\ \mbox{~~~~~~~~~~~~~~~~~~---~~~~---~~~~---~~~~~~~~~~~~~~~~~~~|}\\ \mbox{Level~2~~~~~~~~3~|~D~|~4|~E~|~~|~F~|~5~~~~~~~~~~~~~~~~|}\\ \mbox{~~~~~~~~~~~~~~~~~~---~~~~---~~~~---~~~~~~~~~~~~~~~~~~~|}\\ \mbox{~~~~~~~~~~~~~~~~~~~{\textbackslash}~~{\textbackslash}~{\_}~/~~~~~~~|~~~~~~~~~~~~~~~~~~~|}\\ \mbox{~~~~~~~~~~~~~~~~~~~~{\textbackslash}~~~~/~{\textbackslash}~{\_}~~~~|~~~~~~~~~~~~~~~~~~~|}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~{\textbackslash}~~/~~~~~~{\textbackslash}~~|~~~~~~~~~~~~~~~~~~~|}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~---~~~~~~---~~~~~~~~~~~~~~~~~~~~|}\\ \mbox{Level~1~~~~~~~~~~~~1~|~B~|~~~~|~C~|~2~~~~~~~~~~~~~~~~~|}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~---~~~~~~---~~~~~~~~~~~~~~~~~~~~|}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~{\textbackslash}~~~~~~/~~~~~~~~~~~~~~~~~~~~~~|}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~{\textbackslash}~~~~/~~~~~~~~~~~~~~~~~~~~~~{\textbackslash}~/}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~~---}\\ \mbox{Level~0~~~~~~~~~~~~~~~~~0~|~A~|~~~~~~~~~~~~~~~~(more~specialized)}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~~---} \end{flushleft}\end{ttfamily} \end{quote} The linearizations of O,D,E and F are trivial: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{L[O]~=~O}\\ \mbox{L[D]~=~D~O}\\ \mbox{L[E]~=~E~O}\\ \mbox{L[F]~=~F~O} \end{flushleft}\end{ttfamily} \end{quote} The linearization of B can be computed as \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{L[B]~=~B~+~merge(DO,~EO,~DE)} \end{flushleft}\end{ttfamily} \end{quote} We see that D is a good head, therefore we take it and we are reduced to compute merge(O,EO,E). Now O is not a good head, since it is in the tail of the sequence EO. In this case the rule says that we have to skip to the next sequence. Then we see that E is a good head; we take it and we are reduced to compute merge(O,O) which gives O. Therefore \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{L[B]~=~~B~D~E~O} \end{flushleft}\end{ttfamily} \end{quote} Using the same procedure one finds: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{L[C]~=~C~+~merge(DO,FO,DF)}\\ \mbox{~~~~~=~C~+~D~+~merge(O,FO,F)}\\ \mbox{~~~~~=~C~+~D~+~F~+~merge(O,O)}\\ \mbox{~~~~~=~C~D~F~O} \end{flushleft}\end{ttfamily} \end{quote} Now we can compute: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{L[A]~=~A~+~merge(BDEO,CDFO,BC)}\\ \mbox{~~~~~=~A~+~B~+~merge(DEO,CDFO,C)}\\ \mbox{~~~~~=~A~+~B~+~C~+~merge(DEO,DFO)}\\ \mbox{~~~~~=~A~+~B~+~C~+~D~+~merge(EO,FO)}\\ \mbox{~~~~~=~A~+~B~+~C~+~D~+~E~+~merge(O,FO)}\\ \mbox{~~~~~=~A~+~B~+~C~+~D~+~E~+~F~+~merge(O,O)}\\ \mbox{~~~~~=~A~B~C~D~E~F~O} \end{flushleft}\end{ttfamily} \end{quote} In this example, the linearization is ordered in a pretty nice way according to the inheritance level, in the sense that lower levels (i.e. more specialized classes) have higher precedence (see the inheritance graph). However, this is not the general case. I leave as an exercise for the reader to compute the linearization for my second example: \begin{quote} \begin{verbatim}>>> O = object >>> class F(O): pass >>> class E(O): pass >>> class D(O): pass >>> class C(D,F): pass >>> class B(E,D): pass >>> class A(B,C): pass\end{verbatim} \end{quote} The only difference with the previous example is the change B(D,E) --{\textgreater} B(E,D); however even such a little modification completely changes the ordering of the hierarchy \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~~6}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~---}\\ \mbox{Level~3~~~~~~~~~~~~~~~~~~|~O~|}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~/~~---~~{\textbackslash}}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~/~~~~|~~~~{\textbackslash}}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~/~~~~~|~~~~~{\textbackslash}}\\ \mbox{~~~~~~~~~~~~~~~~~~~~/~~~~~~|~~~~~~{\textbackslash}}\\ \mbox{~~~~~~~~~~~~~~~~~~---~~~~~---~~~~---}\\ \mbox{Level~2~~~~~~~~2~|~E~|~4~|~D~|~~|~F~|~5}\\ \mbox{~~~~~~~~~~~~~~~~~~---~~~~~---~~~~---}\\ \mbox{~~~~~~~~~~~~~~~~~~~{\textbackslash}~~~~~~/~{\textbackslash}~~~~~/}\\ \mbox{~~~~~~~~~~~~~~~~~~~~{\textbackslash}~~~~/~~~{\textbackslash}~~~/}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~{\textbackslash}~~/~~~~~{\textbackslash}~/}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~---~~~~~---}\\ \mbox{Level~1~~~~~~~~~~~~1~|~B~|~~~|~C~|~3}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~---~~~~~---}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~{\textbackslash}~~~~~~~/}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~{\textbackslash}~~~~~/}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~---}\\ \mbox{Level~0~~~~~~~~~~~~~~~~0~|~A~|}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~---} \end{flushleft}\end{ttfamily} \end{quote} Notice that the class E, which is in the second level of the hierarchy, precedes the class C, which is in the first level of the hierarchy, i.e. E is more specialized than C, even if it is in a higher level. A lazy programmer can obtain the MRO directly from Python 2.2, since in this case it coincides with the Python 2.3 linearization. It is enough to invoke the .mro() method of class A: \begin{quote} \begin{verbatim}>>> A.mro() (, , , , , , )\end{verbatim} \end{quote} Finally, let me consider the example discussed in the first section, involving a serious order disagreement. In this case, it is straightforward to compute the linearizations of O, X, Y, A and B: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{L[O]~=~0}\\ \mbox{L[X]~=~X~O}\\ \mbox{L[Y]~=~Y~O}\\ \mbox{L[A]~=~A~X~Y~O}\\ \mbox{L[B]~=~B~Y~X~O} \end{flushleft}\end{ttfamily} \end{quote} However, it is impossible to compute the linearization for a class C that inherits from A and B: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{L[C]~=~C~+~merge(AXYO,~BYXO,~AB)}\\ \mbox{~~~~~=~C~+~A~+~merge(XYO,~BYXO,~B)}\\ \mbox{~~~~~=~C~+~A~+~B~+~merge(XYO,~YXO)} \end{flushleft}\end{ttfamily} \end{quote} At this point we cannot merge the lists XYO and YXO, since X is in the tail of YXO whereas Y is in the tail of XYO: therefore there are no good heads and the C3 algorithm stops. Python 2.3 raises an error and refuses to create the class C. %___________________________________________________________________________ \hypertarget{bad-method-resolution-orders}{} \pdfbookmark[1]{Bad Method Resolution Orders}{bad-method-resolution-orders} \subsection*{Bad Method Resolution Orders} A MRO is \emph{bad} when it breaks such fundamental properties as local precedence ordering and monotonicity. In this section, I will show that both the MRO for classic classes and the MRO for new style classes in Python 2.2 are bad. It is easier to start with the local precedence ordering. Consider the following example: \begin{quote} \begin{verbatim}>>> F=type('Food',(),{'remember2buy':'spam'}) >>> E=type('Eggs',(F,),{'remember2buy':'eggs'}) >>> G=type('GoodFood',(F,E),{}) #under Python 2.3 this is an error\end{verbatim} \end{quote} with inheritance diagram \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{~~~~~~~~~~~~~O}\\ \mbox{~~~~~~~~~~~~~|}\\ \mbox{(buy~spam)~~~F}\\ \mbox{~~~~~~~~~~~~~|~{\textbackslash}}\\ \mbox{~~~~~~~~~~~~~|~E~~~(buy~eggs)}\\ \mbox{~~~~~~~~~~~~~|~/}\\ \mbox{~~~~~~~~~~~~~G}\\ \mbox{}\\ \mbox{~~~~~~(buy~eggs~or~spam~?)} \end{flushleft}\end{ttfamily} \end{quote} We see that class G inherits from F and E, with F \emph{before} E: therefore we would expect the attribute \emph{G.remember2buy} to be inherited by \emph{F.rembermer2buy} and not by \emph{E.remember2buy}: nevertheless Python 2.2 gives \begin{quote} \begin{verbatim}>>> G.remember2buy #under Python 2.3 this is an error 'eggs'\end{verbatim} \end{quote} This is a breaking of local precedence ordering since the order in the local precedence list, i.e. the list of the parents of G, is not preserved in the Python 2.2 linearization of G: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{L[G,P22]=~G~E~F~object~~~{\#}~F~*follows*~E} \end{flushleft}\end{ttfamily} \end{quote} One could argue that the reason why F follows E in the Python 2.2 linearization is that F is less specialized than E, since F is the superclass of E; nevertheless the breaking of local precedence ordering is quite non-intuitive and error prone. This is particularly true since it is a different from old style classes: \begin{quote} \begin{verbatim}>>> class F: remember2buy='spam' >>> class E(F): remember2buy='eggs' >>> class G(F,E): pass >>> G.remember2buy 'spam'\end{verbatim} \end{quote} In this case the MRO is GFEF and the local precedence ordering is preserved. As a general rule, hierarchies such as the previous one should be avoided, since it is unclear if F should override E or viceversa. Python 2.3 solves the ambiguity by raising an exception in the creation of class G, effectively stopping the programmer from generating ambiguous hierarchies. The reason for that is that the C3 algorithm fails when the merge \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{merge(FO,EFO,FE)} \end{flushleft}\end{ttfamily} \end{quote} cannot be computed, because F is in the tail of EFO and E is in the tail of FE. The real solution is to design a non-ambiguous hierarchy, i.e. to derive G from E and F (the more specific first) and not from F and E; in this case the MRO is GEF without any doubt. \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{~~~~~~~~~~~O}\\ \mbox{~~~~~~~~~~~|}\\ \mbox{~~~~~~~~~~~F~(spam)}\\ \mbox{~~~~~~~~~/~|}\\ \mbox{(eggs)~~~E~|}\\ \mbox{~~~~~~~~~{\textbackslash}~|}\\ \mbox{~~~~~~~~~~~G}\\ \mbox{~~~~~~~~~~~~~(eggs,~no~doubt)} \end{flushleft}\end{ttfamily} \end{quote} Python 2.3 forces the programmer to write good hierarchies (or, at least, less error-prone ones). On a related note, let me point out that the Python 2.3 algorithm is smart enough to recognize obvious mistakes, as the duplication of classes in the list of parents: \begin{quote} \begin{verbatim}>>> class A(object): pass >>> class C(A,A): pass # error Traceback (most recent call last): File "", line 1, in ? TypeError: duplicate base class A\end{verbatim} \end{quote} Python 2.2 (both for classic classes and new style classes) in this situation, would not raise any exception. Finally, I would like to point out two lessons we have learned from this example: \newcounter{listcnt28} \begin{list}{\arabic{listcnt28}.} { \usecounter{listcnt28} \setlength{\rightmargin}{\leftmargin} } \item {} despite the name, the MRO determines the resolution order of attributes, not only of methods; \item {} the default food for Pythonistas is spam ! (but you already knew that ;-) \end{list} Having discussed the issue of local precedence ordering, let me now consider the issue of monotonicity. My goal is to show that neither the MRO for classic classes nor that for Python 2.2 new style classes is monotonic. To prove that the MRO for classic classes is non-monotonic is rather trivial, it is enough to look at the diamond diagram: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{~~~C}\\ \mbox{~~/~{\textbackslash}}\\ \mbox{~/~~~{\textbackslash}}\\ \mbox{A~~~~~B}\\ \mbox{~{\textbackslash}~~~/}\\ \mbox{~~{\textbackslash}~/}\\ \mbox{~~~D} \end{flushleft}\end{ttfamily} \end{quote} One easily discerns the inconsistency: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{L[B,P21]~=~B~C~~~~~~~~{\#}~B~precedes~C~:~B's~methods~win}\\ \mbox{L[D,P21]~=~D~A~C~B~C~~{\#}~B~follows~C~~:~C's~methods~win!} \end{flushleft}\end{ttfamily} \end{quote} On the other hand, there are no problems with the Python 2.2 and 2.3 MROs, they give both \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{L[D]~=~D~A~B~C} \end{flushleft}\end{ttfamily} \end{quote} Guido points out in his essay [\hyperlink{id41}{20}] that the classic MRO is not so bad in practice, since one can typically avoids diamonds for classic classes. But all new style classes inherit from object, therefore diamonds are unavoidable and inconsistencies shows up in every multiple inheritance graph. The MRO of Python 2.2 makes breaking monotonicity difficult, but not impossible. The following example, originally provided by Samuele Pedroni, shows that the MRO of Python 2.2 is non-monotonic: \begin{quote} \begin{verbatim}>>> class A(object): pass >>> class B(object): pass >>> class C(object): pass >>> class D(object): pass >>> class E(object): pass >>> class K1(A,B,C): pass >>> class K2(D,B,E): pass >>> class K3(D,A): pass >>> class Z(K1,K2,K3): pass\end{verbatim} \end{quote} Here are the linearizations according to the C3 MRO (the reader should verify these linearizations as an exercise and draw the inheritance diagram ;-) \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{L[A]~=~A~O}\\ \mbox{L[B]~=~B~O}\\ \mbox{L[C]~=~C~O}\\ \mbox{L[D]~=~D~O}\\ \mbox{L[E]~=~E~O}\\ \mbox{L[K1]=~K1~A~B~C~O}\\ \mbox{L[K2]=~K2~D~B~E~O}\\ \mbox{L[K3]=~K3~D~A~O}\\ \mbox{L[Z]~=~Z~K1~K2~K3~D~A~B~C~E~O} \end{flushleft}\end{ttfamily} \end{quote} Python 2.2 gives exactly the same linearizations for A, B, C, D, E, K1, K2 and K3, but a different linearization for Z: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{L[Z,P22]~=~Z~K1~K3~A~K2~D~B~C~E~O} \end{flushleft}\end{ttfamily} \end{quote} It is clear that this linearization is \emph{wrong}, since A comes before D whereas in the linearization of K3 A comes \emph{after} D. In other words, in K3 methods derived by D override methods derived by A, but in Z, which still is a subclass of K3, methods derived by A override methods derived by D! This is a violation of monotonicity. Moreover, the Python 2.2 linearization of Z is also inconsistent with local precedence ordering, since the local precedence list of the class Z is [K1, K2, K3] (K2 precedes K3), whereas in the linearization of Z K2 \emph{follows} K3. These problems explain why the 2.2 rule has been dismissed in favor of the C3 rule. \begin{figure}[b]\hypertarget{id40}[19] The thread on python-dev started by Samuele Pedroni: \href{http://mail.python.org/pipermail/python-dev/2002-October/029035.html}{http://mail.python.org/pipermail/python-dev/2002-October/029035.html} \end{figure} \begin{figure}[b]\hypertarget{id41}[20] The paper \emph{A Monotonic Superclass Linearization for Dylan}: \href{http://www.webcom.com/haahr/dylan/linearization-oopsla96.html}{http://www.webcom.com/haahr/dylan/linearization-oopsla96.html} \end{figure} \begin{figure}[b]\hypertarget{id42}[21] Guido van Rossum's essay, \emph{Unifying types and classes in Python 2.2}: \href{http://www.python.org/2.2.2/descrintro.html}{http://www.python.org/2.2.2/descrintro.html} \end{figure} \begin{figure}[b]\hypertarget{id43}[22] The (in)famous book on metaclasses, \emph{Putting Metaclasses to Work}: Ira R. Forman, Scott Danforth, Addison-Wesley 1999 (out of print, but probably still available on \href{http://www.amazon.com}{http://www.amazon.com}) \end{figure} %___________________________________________________________________________ \hypertarget{understanding-the-method-resolution-order}{} \pdfbookmark[1]{Understanding the Method Resolution Order}{understanding-the-method-resolution-order} \subsection*{Understanding the Method Resolution Order} The MRO of any given (new style) Python class is given by the special attribute \texttt{{\_}{\_}mro{\_}{\_}}. Notice that since Python is an extremely dynamic language it is possible to delete and to generate whole classes at run time, therefore the MRO is a dynamic concept. For instance, let me show how it is possibile to remove a class from my paleoanthropological hierarchy: for instance I can replace the last class 'HomoSapiensSapiens' with 'HomoSapiensNeardenthalensis' (changing a class in the middle of the hierarchy would be more difficult). The following lines do the job dynamically: \begin{quote} \begin{verbatim}>>> from oopp import * >>> del HomoSapiensSapiens >>> class HomoSapiensNeardenthalensis(HomoSapiens): ... def can(self): ... super(self.__this,self).can() ... print " - make something" >>> reflective(HomoSapiensNeardenthalensis) >>> HomoSapiensNeardenthalensis().can() HomoSapiensNeardenthalensis can: - make tools - make abstractions - make something\end{verbatim} \end{quote} In this case the MRO of 'HomoSapiensNeardenthalensis', i.e. the list of all its ancestors, is \begin{quote} \begin{verbatim}>>> HomoSapiensNeardenthalensis.__mro__ [,, , , , ]\end{verbatim} \end{quote} The \texttt{{\_}{\_}mro{\_}{\_}} attribute gives the \emph{linearization} of the class, i.e. the ordered list of its ancestors, starting from the class itself and ending with object. The linearization of a class is essential in order to specify the resolution order of methods and attributes, i.e. the Method Resolution Order (MRO). In the case of single inheritance hierarchies, such the paleonthropological example, the MRO is pretty obvious; on the contrary it is a quite non-trivial concept in the case of multiple inheritance hierarchies. For instance, let me reconsider my first example of multiple inheritance, the \texttt{NonInstantiableClock} class, inheriting from 'NonInstantiable' and 'Clock'. I may represent the hierarchy with the following inheritance graph: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{~~~~~~~~~~--~~~object~~~--~}\\ \mbox{~~~~~~~~/~~~~~({\_}{\_}new{\_}{\_})~~~~{\textbackslash}}\\ \mbox{~~~~~~~/~~~~~~~~~~~~~~~~~~~~{\textbackslash}}\\ \mbox{~~~~~~/~~~~~~~~~~~~~~~~~~~~~~{\textbackslash}}\\ \mbox{~~~Clock~~~~~~~~~~~~~~~~NonInstantiable}\\ \mbox{(get{\_}time)~~~~~~~~~~~~~~~~~({\_}{\_}new{\_}{\_})}\\ \mbox{~~~~~{\textbackslash}~~~~~~~~~~~~~~~~~~~~~~~~~/}\\ \mbox{~~~~~~{\textbackslash}~~~~~~~~~~~~~~~~~~~~~~~/}\\ \mbox{~~~~~~~{\textbackslash}~~~~~~~~~~~~~~~~~~~~~/}\\ \mbox{~~~~~~~~{\textbackslash}~~~~~~~~~~~~~~~~~~~/}\\ \mbox{~~~~~~~~~{\textbackslash}~~~~~~~~~~~~~~~~~/}\\ \mbox{~~~~~~~~~NonInstantiableClock~~~}\\ \mbox{~~~~~~~~~~(get{\_}time,{\_}{\_}new{\_}{\_})} \end{flushleft}\end{ttfamily} \end{quote} The class \texttt{Clock} define a \texttt{get{\_}time} method, whereas the class \texttt{NonInstantiable} overrides the \texttt{{\_}{\_}new{\_}{\_}} method of the \texttt{object} class; the class \texttt{NonInstantiableClock} inherits \texttt{get{\_}time} from 'Clock' and \texttt{{\_}{\_}new{\_}{\_}} from 'NonInstantiable'. The linearization of 'NonInstantiableClock' is \begin{quote} \begin{verbatim}>>> NonInstantiableClock.mro() [, , , ]\end{verbatim} \end{quote} In particular, since 'NonInstantiable' precedes 'object', its \texttt{{\_}{\_}new{\_}{\_}} method overrides the \texttt{object} new method. However, with the MRO used before Python 2.2, the linearization would have been \texttt{NonInstantiableClock, Clock, object, NonInstantiable, object} and the \texttt{{\_}{\_}new{\_}{\_}} method of object would have (hypothetically, of course, since before Python 2.2 there was not \texttt{{\_}{\_}new{\_}{\_}} method! ;-) overridden the \texttt{{\_}{\_}new{\_}{\_}} method of \texttt{NonInstantiable}, therefore \texttt{NonInstantiableClock} would have lost the property of being non-instantiable! This simple example shows that the choice of a correct Method Resolution Order is far from being obvious in general multiple inheritance hierarchies. After a false start in Python 2.2, (with a MRO failing in some subtle cases) Python 2.3 decided to adopt the so-called C3 MRO, invented by people working on Dylan (even if Dylan itself uses the MRO of Common Lisp CLOS). Since this is quite a technical matter, I defer the interested reader to appendix 2 for a full discussion of the C3 algorithm. Here, I prefer to point out how the built-in \texttt{super} object works in multiple inheritance situations. To this aim, it is convenient to define an utility function that retrieves the ancestors of a given class with respect to the MRO of one of its subclasses: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~ancestor(C,S=None):}\\ \mbox{~~~~"""Returns~the~ancestors~of~the~first~argument~with~respect~to~the~}\\ \mbox{~~~~MRO~of~the~second~argument.~If~the~second~argument~is~None,~then~}\\ \mbox{~~~~returns~the~MRO~of~the~first~argument."""}\\ \mbox{~~~~if~C~is~object:}\\ \mbox{~~~~~~~~raise~TypeError("There~is~no~superclass~of~object")}\\ \mbox{~~~~elif~S~is~None~or~S~is~C:}\\ \mbox{~~~~~~~~return~list(C.{\_}{\_}mro{\_}{\_})}\\ \mbox{~~~~elif~issubclass(S,C):~{\#}~typical~case}\\ \mbox{~~~~~~~~mro=list(S.{\_}{\_}mro{\_}{\_})}\\ \mbox{~~~~~~~~return~mro[mro.index(C):]~{\#}~compute~the~ancestors~from~the~MRO~of~S}\\ \mbox{~~~~else:}\\ \mbox{~~~~~~~~raise~TypeError("S~must~be~a~subclass~of~C")}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Let me show how the function \texttt{ancestor} works. Consider the class \texttt{Clock} in isolation: then its direct superclass, i.e. the first ancestor, is \texttt{object}, \begin{quote} \begin{verbatim}>>> from oopp import * >>> ancestor(Clock)[1] \end{verbatim} \end{quote} therefore \texttt{super(Clock).{\_}{\_}new{\_}{\_}} retrieves the \texttt{object.{\_}{\_}new{\_}{\_}} method: \begin{quote} \begin{verbatim}>>> super(Clock).__new__ \end{verbatim} \end{quote} Consider now the \texttt{Clock} class together with its subclass \texttt{NonInstantiableClock}: in this case the first ancestor of \texttt{Clock}, \emph{with respect to the MRO of 'NonInstantiableClock'} is \texttt{NonInstantiable} \begin{quote} \begin{verbatim}>>> ancestor(Clock,NonInstantiableClock)[1] \end{verbatim} \end{quote} Therefore \texttt{super(Clock,NonInstantiableClock).{\_}{\_}new{\_}{\_}} retrieves the \texttt{NonInstantiable.{\_}{\_}new{\_}{\_}} method: \begin{quote} \begin{verbatim}>>> super(Clock,NonInstantiableClock).__new__ >>> NonInstantiable.__new__ \end{verbatim} \end{quote} It must be pointed out that \texttt{super(C,S)} is equivalent but not the same than \texttt{ancestor(C,S)[1]}, since it does not return the superclass: it returns a super object, instead: \begin{quote} \begin{verbatim}>>> super(Clock,NonInstantiableClock) , >\end{verbatim} {\#}{\textless}oopp.py{\textgreater} {\#}class Super(super): {\#} def {\_}{\_}init{\_}{\_}(self,C,S=None): {\#} super(Super,self).{\_}{\_}init{\_}{\_}(C,S) {\#} self.{\_}{\_}name{\_}{\_}=''Super({\%}s)`` {\%} C.{\_}{\_}name{\_}{\_} {\#}{\textless}/oopp.py{\textgreater} \end{quote} Finally, there is little quirk of super: \begin{quote} \begin{verbatim}>>> class C(PrettyPrinted): pass >>> s=super(C,C()) >>> s.__str__()\end{verbatim} \end{quote} but \begin{quote} \begin{verbatim}>>> str(s) # idem for print s ", >"\end{verbatim} \end{quote} Idem for non-pre-existing methods: \begin{quote} \begin{verbatim}>>> class D(list): pass ... >>> s=super(D,D()) >>> s.__len__() 0 >>> len(s) #error Traceback (most recent call last): File "", line 1, in ? TypeError: len() of unsized object\end{verbatim} \end{quote} The same problem comes with \texttt{{\_}{\_}getattr{\_}{\_}}: \begin{quote} \begin{verbatim}>>> class E(object): ... def __getattr__(self,name): ... if name=='__len__': return lambda:0 ... >>> e=E() >>> e.__len__() 0 >>> len(e) # error Traceback (most recent call last): File "", line 1, in ? TypeError: len() of unsized object\end{verbatim} \end{quote} %___________________________________________________________________________ \hypertarget{counting-instances}{} \pdfbookmark[1]{Counting instances}{counting-instances} \subsection*{Counting instances} \begin{quote} \begin{flushleft} \emph{Everything~should~be~built~top-down,~except~the~first~time.}~\\ --~Alan~Perlis \end{flushleft} \end{quote} Multiple inheritance adds a step further to the bottom-up philosophy and it makes appealing the idea of creating classes with the only purpose of being derived. Whereas in the top-down approach one starts with full featured standalone classes, to be further refined, in the mix-in approach one starts with bare bone classes, providing very simple or even trivial features, with the purpose of providing basic reusable components in multiple inheritance hierarchies. At the very end, the idea is to generate a library of \emph{mixin} classes, to be composed with other classes. We already saw a couple of examples of mixin classes: 'NonInstantiable' and 'Customizable'. In this paragraph I will show three other examples: 'WithCounter','Singleton' and 'AvoidDuplication'. A common requirement for a class is the ability to count the number of its instances. This is a quite easy problem: it is enough to increments a counter each time an instance of that class is initialized. However, this idea can be implemented in the wrong way. i.e. naively one could implement counting capabilities in a class without such capabilities by modifying the \texttt{{\_}{\_}init{\_}{\_}} method explicitly in the original source code. A better alternative is to follow the bottom-up approach and to implement the counting feature in a separate mix-in class: then the feature can be added to the original class via multiple inheritance, without touching the source. Moreover, the counter class becomes a reusable components that can be useful for other problems, too. In order to use the mix-in approach, the \texttt{{\_}{\_}new{\_}{\_}} method of the counter class must me cooperative, and preferably via an anonymous super call. \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~WithCounter(object):~}\\ \mbox{~~~~"""Mixin~class~counting~the~total~number~of~its~instances~and~storing~}\\ \mbox{~~~~~it~in~the~class~attribute~counter."""}\\ \mbox{}\\ \mbox{~~~~counter=0~{\#}~class~attribute~(or~static~attribute~in~C++/Java~terms)}\\ \mbox{~}\\ \mbox{~~~~def~{\_}{\_}new{\_}{\_}(cls,*args,**kw):}\\ \mbox{~~~~~~~~cls.counter+=1~{\#}~increments~the~class~attribute}\\ \mbox{~~~~~~~~return~super(cls.{\_}{\_}this,cls).{\_}{\_}new{\_}{\_}(cls,*args,**kw)~~}\\ \mbox{~~~~~~~~{\#}anonymous~cooperative~call~to~the~superclass's~method~{\_}{\_}new{\_}{\_}}\\ \mbox{}\\ \mbox{reflective(WithCounter)}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Each time an instance of 'WithCounter' is initialized, the counter 'count' is incremented and when 'WithCounter' is composed trough multiple inheritance, its '{\_}{\_}new{\_}{\_}' method cooperatively invokes the \texttt{{\_}{\_}new{\_}{\_}} method of the other components. For instance, I can use 'WithCounter' to implement a 'Singleton', i.e. a class that can have only one instance. This kind of classes can be obtained as follows: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~Singleton(WithCounter):}\\ \mbox{~~~~"If~you~inherit~from~me,~you~can~only~have~one~instance"}\\ \mbox{~~~~def~{\_}{\_}new{\_}{\_}(cls,*args,**kw):}\\ \mbox{~~~~~~~~if~cls.counter==0:~{\#}first~call}\\ \mbox{~~~~~~~~~~~~cls.instance=super(cls.{\_}{\_}this,cls).{\_}{\_}new{\_}{\_}(cls,*args,**kw)}\\ \mbox{~~~~~~~~return~cls.instance}\\ \mbox{}\\ \mbox{reflective(Singleton)}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} As an application, I can create a class \texttt{SingleClock} that inherits from \texttt{Clock} \emph{and} from \texttt{Singleton}. This means that \texttt{SingleClock} is both a 'Clock' and a 'Singleton', i.e. there can be only a clock: \begin{quote} \begin{verbatim}>>> from oopp import Clock,Singleton >>> class SingleClock(Clock,Singleton): pass ... >>> clock1=SingleClock() >>> clock2=SingleClock() >>> clock1 is clock2 True\end{verbatim} \end{quote} Instantiating many clocks is apparently possible (i.e. no error message is given) but you always obtain the same instance. This makes sense, since there is only one time on the system and a single clock is enough. A variation of the 'Singleton' is a class that generates a new instance only when a certain condition is satisfied. Suppose for instance one has a 'Disk' class, to be instantiated with the syntax \texttt{Disk(xpos,ypos,radius)}. It is clear that two disks with the same radius and the same position in the cartesian plane, are essentially the same disk (assuming there are no additional attributes such as the color). Therefore it is a vaste of memory to instantiate two separate objects to describe the same disk. To solve this problem, one possibility is to store in a list the calling arguments. When it is time to instanciate a new objects with arguments args = xpos,ypos, radius, Python should check if a disk with these arguments has already been instanciated: in this case that disk should be returned, not a new one. This logic can be elegantly implemented in a mix-in class such as the following (compare with the \texttt{withmemory} wrapper in chapter 2): \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~AvoidDuplication(object):}\\ \mbox{~~~~def~{\_}{\_}new{\_}{\_}(cls,*args,**kw):}\\ \mbox{~~~~~~~~return~super(cls.{\_}{\_}this,cls).{\_}{\_}new{\_}{\_}(cls,*args,**kw)~}\\ \mbox{~~~~{\_}{\_}new{\_}{\_}=withmemory({\_}{\_}new{\_}{\_})~{\#}~collects~the~calls~in~{\_}{\_}new{\_}{\_}.result}\\ \mbox{}\\ \mbox{reflective(AvoidDuplication)}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Notice that 'AvoidDuplication' is introduced with the only purpose of giving its functionality to 'Disk': in order to reach this goal, it is enough to derive 'Disk' from this class and our previously introduced 'GeometricFigure' class by writing something like \begin{quote} \begin{verbatim}>>> from oopp import * >>> class Disk(GeometricFigure,AvoidDuplication): ... def __init__(self,xpos,ypos,radius): ... return super(Disk,self).__init__('(x-x0)**2+(y-y0)**2 <= r**2', ... x0=xpos,y0=ypos,r=radius)\end{verbatim} \end{quote} Now, if we create a disk \begin{quote} \begin{verbatim}>>> c1=Disk(0,0,10) #creates a disk of radius 10\end{verbatim} \end{quote} it is easy enough to check that trying to instantiate a new disk with the \emph{same} arguments return the old disk: \begin{quote} \begin{verbatim}>>> c2=Disk(0,0,10) #returns the *same* old disk >>> c1 is c2 True\end{verbatim} \end{quote} Here, everything works, because through the cooperative \texttt{super} mechanism, \texttt{Disk.{\_}{\_}init{\_}{\_}} calls \texttt{AvoidDuplication.{\_}{\_}init{\_}{\_}} that calls \texttt{GeometricFigure.{\_}{\_}init{\_}{\_}} that in turns initialize the disk. Inverting the order of 'AvoidDuplication' and 'GeometricFigure' would case a disaster, since \texttt{GeometricFigure.{\_}{\_}init{\_}{\_}} would override \texttt{AvoidDuplication.{\_}{\_}init{\_}{\_}}. Alternatively, one could use the object factory 'Makeobj' implemented in chapter 3: \begin{quote} \begin{verbatim}>>> class NonDuplicatedFigure(GeometricFigure,AvoidDuplication): pass >>> makedisk=Makeobj(NonDuplicatedFigure,'(x-x0)**2/4+(y-y0)**2 <= r**2') >>> disk1=makedisk(x0=38,y0=7,r=5) >>> disk2=makedisk(x0=38,y0=7,r=5) >>> disk1 is disk2 True\end{verbatim} \end{quote} Remark: it is interesting to notice that the previous approach would not work for keyword arguments, directly, since dictionary are unhashable. %___________________________________________________________________________ \hypertarget{the-pizza-shop-example}{} \pdfbookmark[1]{The pizza-shop example}{the-pizza-shop-example} \subsection*{The pizza-shop example} Now it is time to give a non-trivial example of multiple inheritance with cooperative and non-cooperative classes. The point is that multiple inheritance can easily leads to complicated hierarchies: where the resolution order of methods is far from being obvious and actually can give bad surprises. To explain the issue, let me extend the program for the pizza-shop owner of chapter 4, by following the bottom-up approach and using anonymous cooperative super calls. In this approach, one starts from the simplest thing. It is clear that the pizza-shop owner has interest in recording all the pizzas he sell. To this aim, he needs a class providing logging capabilities: each time a new instance is created, its features are stored in a log file. In order to count the total number of instances, 'WithLogger' must derive from the 'WithCounter' class. In order to have a nicely printed message, 'WithLogger' must derive from 'PrettyPrinted'. Finally, since 'WithLogger' must be a general purpose class that I will reuse in other problem as a mixin class, it must be cooperative. 'WithLogger' can be implemented as follows: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~WithLogger(WithCounter,PrettyPrinted):}\\ \mbox{~~~~"""WithLogger~inherits~from~WithCounter~the~'count'~class~attribute;~}\\ \mbox{~~~~moreover~it~inherits~'{\_}{\_}str{\_}{\_}'~from~PrettyPrinted"""}\\ \mbox{~~~~logfile=sys.stdout~{\#}default}\\ \mbox{~~~~verboselog=False~{\#}default}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(self,*args,**kw):~}\\ \mbox{~~~~~~~~super(self.{\_}{\_}this,self).{\_}{\_}init{\_}{\_}(*args,**kw)~{\#}~cooperative}\\ \mbox{~~~~~~~~dic=attributes(self)~{\#}~non-special~attributes~dictionary}\\ \mbox{~~~~~~~~print~>>~self.logfile,'*'*77}\\ \mbox{~~~~~~~~print~>>~self.logfile,~time.asctime()}\\ \mbox{~~~~~~~~print~>>~self.logfile,~"{\%}s.~Created~{\%}s"~{\%}~(type(self).counter,self)}\\ \mbox{~~~~~~~~if~self.verboselog:}\\ \mbox{~~~~~~~~~~~~print~>>~self.logfile,"with~accessibile~non-special~attributes:"}\\ \mbox{~~~~~~~~~~~~if~not~dic:~print~>>~self.logfile,"",}\\ \mbox{~~~~~~~~~~~~else:~print~>>~self.logfile,~pretty(dic)}\\ \mbox{}\\ \mbox{reflective(WithLogger)}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Here I could well use \texttt{super(self.{\_}{\_}this,self).{\_}{\_}init{\_}{\_}(*args,**kw)} instead of \texttt{super(self.{\_}{\_}this,self).{\_}{\_}init{\_}{\_}(*args,**kw)}, nevertheless the standard \texttt{super} works in this case and I can use it with better performances. Thanks to the power of multiple inheritance, we may give logging features to the 'CustomizablePizza' class defined in chapter 4 with just one line of code: \begin{quote} \begin{verbatim}>>> from oopp import * >>> class Pizza(WithLogger,CustomizablePizza): ... "Notice, WithLogger is before CustomizablePizza" >>> Pizza.With(toppinglist=['tomato'])('small') **************************************************************************** Sat Feb 22 14:54:44 2003 1. Created <__main__.Pizza object at 0x816927c>\end{verbatim} \end{quote} It is also possible to have a more verbose output: \begin{quote} \begin{verbatim}>>> Pizza.With(verboselog=True) >>> Pizza('large') **************************************************************************** Sat Feb 22 14:59:51 2003 1. Created with accessibile non-special attributes: With = > baseprice = 1 count = 2 formatstring = %s logfile = ', mode 'w' at 0x402c2058> price = > size = large sizefactor = {'small': 1, 'large': 3, 'medium': 2} topping_unit_price = 0.5 toppinglist = ['tomato'] toppings_price = > verboselog = True with = > <__main__.Pizza object at 0x401ce7ac>\end{verbatim} \end{quote} However, there is a problem here, since the output is '{\textless}Pizza{\textgreater}' and not the nice 'large pizza with tomato, cost {\$} 4.5' that we would expect from a child of 'CustomizablePizza'. The solution to the puzzle is given by the MRO: \begin{quote} \begin{verbatim}>>> Pizza.mro() [, , , , , , , ]\end{verbatim} \end{quote} The inheritance graph is rather complicated: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~~object~~7}\\ \mbox{}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~/~~~~~/~~~{\textbackslash}~~~~~{\textbackslash}}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~/~~~~~~/~~~~~{\textbackslash}~~~~~~{\textbackslash}}\\ \mbox{~~~~~~~~~~~~~~~~~~~/~~~~~~~/~~~~~~~{\textbackslash}~~~~~~~{\textbackslash}}\\ \mbox{~~~~~~~~~~~~~~~~~/~~~~~~~~/~~~~~~~~~{\textbackslash}~~~~~~~~{\textbackslash}}\\ \mbox{~~~~~~~~~~~~~~~/~~~~~~~~~/~~~~~~~~~~~{\textbackslash}~~~~~~~~~{\textbackslash}}\\ \mbox{~~~~~~~~~~~~~/~~~~~~~~~~/~~~~~~~~~~~~~{\textbackslash}~~~~~~~~~~{\textbackslash}}\\ \mbox{~~~~~~~~~~~/~~~~~~~~~~~/~~~~~~~~~~~~~~~{\textbackslash}~~~~~~~~~~~{\textbackslash}}\\ \mbox{~~~~~~~~~/~~~~~~~~~~~~/~~~~~~~~~~~~~~~~~{\textbackslash}~~~~~~~~~~~~{\textbackslash}}\\ \mbox{~~~~~~~/~~~~~~~~~~~~~/~~~~~~~~~~~~~~~~~~~{\textbackslash}~~~~~~~~~~~~~{\textbackslash}}\\ \mbox{2~~WithCounter~~~PrettyPrinted~3~~~~GenericPizza~5~~Customizable~6}\\ \mbox{~~({\_}{\_}new{\_}{\_})~~~~({\_}{\_}str{\_}{\_},{\_}{\_}init{\_}{\_})~~~~~~({\_}{\_}str{\_}{\_})~~~~~~~/~~~~~~~~~~~~~~}\\ \mbox{~~~~~~{\textbackslash}~~~~~~~~~~~~/~~~~~~~~~~~~~~~~~~~~~~~/~~~~~~~~/~~~~}\\ \mbox{~~~~~~~{\textbackslash}~~~~~~~~~~/~~~~~~~~~~~~~~~~~~~~~~~/~~~~~~/~~~~~~~~~}\\ \mbox{~~~~~~~~{\textbackslash}~~~~~~~~/~~~~~~~~~~~~~~~~~~~~~~~/~~~~/}\\ \mbox{~~~~~~~~~{\textbackslash}~~~~~~/~~~~~~~~~~~~~~~~~~~~~~~/~~/}\\ \mbox{~~~~~~~~~~{\textbackslash}~~~~/~~~~~~~~~~~~CustomizablePizza~~4}\\ \mbox{~~~~~~~~~~~{\textbackslash}~~/~~~~~~~~~~~~~~~~~~~~~~/~~~~~~~}\\ \mbox{~~~1~~~~~WithLogger~~~~~~~~~~~~~~~~/}\\ \mbox{~~~~~~~~~({\_}{\_}init{\_}{\_})~~~~~~~~~~~~~~/~~~~~~~~}\\ \mbox{~~~~~~~~~~~~~~{\textbackslash}~~~~~~~~~~~~~~~~/}\\ \mbox{~~~~~~~~~~~~~~~{\textbackslash}~~~~~~~~~~~~~/}\\ \mbox{~~~~~~~~~~~~~~~~{\textbackslash}~~~~~~~~~~/}\\ \mbox{~~~~~~~~~~~~~~~~~{\textbackslash}~~~~~~~/}\\ \mbox{~~~~~~~~~~~~~~~~~~{\textbackslash}~~~~/}\\ \mbox{}\\ \mbox{~~~~~~~~~~~~~~~~~~Pizza~~O} \end{flushleft}\end{ttfamily} \end{quote} As we see, the precedence in the resolution of methods is far from being trivial. It is denoted in the graph with numbers from 0 to 7: first the methods of 'Pizza' (level 0), then the methods of 'WithLogger' (level 1), then the methods of 'WithCounter' (level 2), then the methods of 'PrettyPrinted' (level 3), then the methods of 'CustomizablePizza' (level 4), then the methods of 'GenericPizza' (level 5), then the level of 'Customizable' (level 6), finally the 'object' methods (level 7). The reason why the MRO is so, can be understood by studying appendix 1. We see that the \texttt{{\_}{\_}init{\_}{\_}} methods of 'WithLogger' and the \texttt{{\_}{\_}new{\_}{\_}} method of 'WithCounter' are cooperative. \texttt{WithLogger.{\_}{\_}init{\_}{\_}} calls \texttt{WithCounter.{\_}{\_}init{\_}{\_}} that is inherited from \texttt{CustomizablePizza.{\_}{\_}init{\_}{\_}} which is not cooperative, but this is not dangerous since \texttt{CustomizablePizza.{\_}{\_}init{\_}{\_}} does not need to call any other \texttt{{\_}{\_}init{\_}{\_}}. However, \texttt{PrettyPrinted.{\_}{\_}str{\_}{\_}} and \texttt{GenericPizza.{\_}{\_}str{\_}{\_}} are not cooperative and since 'PrettyPrinted' precedes 'GenericPizza', the \texttt{GenericPizza.{\_}{\_}str{\_}{\_}} method is overridden, which is bad. If \texttt{WithLogger.{\_}{\_}init{\_}{\_}} and \texttt{WithCounter.{\_}{\_}new{\_}{\_}} were not cooperative, they would therefore badly breaking the program. The message is: when you inherit from both cooperative and non-cooperative classes, put cooperative classes first. The will be fair and will not blindly override methods of the non-cooperative classes. With multiple inheritance you can reuse old code a lot, however the price to pay, is to have a non-trivial hierarchy. If from the beginning we knew that 'Pizza' was needing a 'WithLogger', a 'WithCounter' and the ability to be 'Customizable' we could have put everything in an unique class. The problem is that in real life one never knows ;) Fortunately, Python dynamism allows to correct design mistakes Remark: in all text books about inheritance, the authors always stress that inheritance should be used as a ''is-a`` relation, not and ''has-a`` relation. In spite of this fact, I have decided to implement the concept of having a logger (or a counter) via a mixin class. One should not blindly believe text books ;) %___________________________________________________________________________ \hypertarget{fixing-wrong-hierarchies}{} \pdfbookmark[1]{Fixing wrong hierarchies}{fixing-wrong-hierarchies} \subsection*{Fixing wrong hierarchies} A typical metaprogramming technique, is the run-time modification of classes. As I said in a previous chapter, this feature can confuse the programmer and should not be abused (in particular it should not be used as a replacement of inheritance!); nevertheless, there applications where the ability of modifying classes at run time is invaluable: for instance, it can be used to correct design mistakes. In this case we would like the \texttt{{\_}{\_}str{\_}{\_} method} of 'PrettyPrinted' to be overridden by \texttt{GenericPizza.{\_}{\_}str{\_}{\_}}. Naively, this can be solved by putting 'WithLogger' after 'GenericPizza'. Unfortunately, doing so would cause \texttt{GenericPizza.{\_}{\_}init{\_}{\_}} to override \texttt{WithLogger.{\_}{\_}init{\_}{\_}}, therefore by loosing logging capabilitiesr, unless countermeasures are taken. A valid countermeasure could be to replace the non-cooperative \texttt{GenericPizza.{\_}{\_}init{\_}{\_}} with a cooperative one. This can miraculously done at run time in few lines of code: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~coop{\_}init(self,size):~{\#}~cooperative~{\_}{\_}init{\_}{\_}~for~GenericPizza}\\ \mbox{~~~~self.size=size}\\ \mbox{~~~~super(self.{\_}GenericPizza{\_}{\_}this,self).{\_}{\_}init{\_}{\_}(size)}\\ \mbox{}\\ \mbox{GenericPizza.{\_}{\_}init{\_}{\_}=coop{\_}init~{\#}~replace~the~old~{\_}{\_}init{\_}{\_}}\\ \mbox{}\\ \mbox{reflective(GenericPizza)~{\#}~define~GenericPizza.{\_}{\_}this}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Notice the usage of the fully qualified private attribute \texttt{self.{\_}GenericPizza{\_}{\_}this} inside \texttt{coop{\_}init}: since this function is defined outside any class, the automatica mangling mechanism cannot work and has to be implemented by hand. Notice also that \texttt{super(self.{\_}GenericPizza{\_}{\_}this,self)} could be replaced by \texttt{super(GenericPizza,self)}; however the simpler approach is less safe against possible future manipulations of the hierarchy. Suppose, for example, we want to create a copy of the hierarchy with the same name but slightly different features (actually, in chapter 8 we will implement a traced copy of the pizza hierarchy, useful for debugging purposes): then, using \texttt{super(GenericPizza,self)} would raise an error, since self would be an instance of the traced hierarchy and \texttt{GenericPizza} the original nontraced class. Using the form \texttt{super(self.{\_}GenericPizza{\_}{\_}this,self)} and making \texttt{self.{\_}GenericPizza{\_}{\_}this} pointing to the traced 'GenericPizza' class (actually this will happen automatically) the problems goes away. Now everything works if 'WithLogger' is put after 'CustomizablePizza' \begin{quote} \begin{verbatim}>>> from oopp import * >>> class PizzaWithLog(CustomizablePizza,WithLogger): pass >>> PizzaWithLog.With(toppinglist=['tomato'])('large') **************************************************************************** Sun Apr 13 16:19:12 2003 1. Created large pizza with tomato, cost $ 4.5 \end{verbatim} \end{quote} The log correctly says \texttt{Created large pizza with tomato, cost {\$} 4.5} and not \texttt{Created } as before since now \texttt{GenericPizza.{\_}{\_}str{\_}{\_}} overrides \texttt{PrettyPrinted.{\_}{\_}str{\_}{\_}}. Moreover, the hierarchy is logically better organized: \begin{quote} \begin{verbatim}>>> PizzaWithLog.mro() [, , , , , , , ]\end{verbatim} \end{quote} I leave as an exercise for the reader to make the \texttt{{\_}{\_}str{\_}{\_}} methods cooperative ;) Obviously, in this example it would have been better to correct the original hierarchy, by leaving 'Beautiful' instantiable from the beginning (that's why I said the 'Beautiful' is an example of wrong mix-in class): nevertheless, sometimes, one has do to with wrong hierarchies written by others, and it can be a pain to fix them, both directly by modifying the original source code, and indirectly by inheritance, since one must change all the names, in order to distinghish the original classes from the fixed ones. In those cases Python dynamism can save your life. This also allows you enhance original classes which are not wrong, but that simply don't do something you want to implement. Modifying classes at run-time can be trivial, as in the examples I have shown here, but can also be rather tricky, as in this example \begin{quote} \begin{verbatim}>>> from oopp import PrettyPrinted >>> class PrettyPrintedWouldBe(object): __str__ = PrettyPrinted.__str__ >>> print PrettyPrintedWouldBe() #error Traceback (most recent call last): File "", line 1, in ? TypeError: unbound method __str__() must be called with PrettyPrinted instance as first argument (got nothing instead)\end{verbatim} \end{quote} As the error message says, the problem here, is that the \texttt{PrettyPrinted.{\_}{\_}str{\_}{\_}} unbound method, has not received any argument. This is because in this form \texttt{PrettyPrintedWouldBe.{\_}{\_}str{\_}{\_}} has been defined as an attribute, not as a real method. The solution is to write \begin{quote} \begin{verbatim}>>> class PrettyPrintedWouldBe(object): ... __str__ = PrettyPrinted.__dict__['__str__'] ... >>> print PrettyPrintedWouldBe() # now it works \end{verbatim} \end{quote} This kind of run-time modifications does not work when private variables are involved: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~C(object):}\\ \mbox{~~~~{\_}{\_}x='C.{\_}{\_}init{\_}{\_}'}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(self):~}\\ \mbox{~~~~~~~~print~self.{\_}{\_}x~{\#}~okay}\\ \mbox{}\\ \mbox{class~D(object):}\\ \mbox{~~~~{\_}{\_}x='D.{\_}{\_}init{\_}{\_}'}\\ \mbox{~~~~{\_}{\_}init{\_}{\_}=C.{\_}{\_}dict{\_}{\_}['{\_}{\_}init{\_}{\_}']~{\#}~error}\\ \mbox{}\\ \mbox{class~New:}\\ \mbox{~~~~class~C(object):}\\ \mbox{~~~~~~~~{\_}{\_}x='New.C.{\_}{\_}init{\_}{\_}'}\\ \mbox{~~~~~~~~{\_}{\_}init{\_}{\_}=C.{\_}{\_}dict{\_}{\_}['{\_}{\_}init{\_}{\_}']~{\#}~okay}\\ \mbox{}\\ \mbox{C()}\\ \mbox{try:~D()}\\ \mbox{except~AttributeError,e:~print~e}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Gives as result \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{C.{\_}{\_}init{\_}{\_}}\\ \mbox{'D'~object~has~no~attribute~'{\_}C{\_}{\_}x'}\\ \mbox{New.C.{\_}{\_}init{\_}{\_}} \end{flushleft}\end{ttfamily} \end{quote} The problem is that when \texttt{C.{\_}{\_}dict{\_}{\_}['{\_}{\_}init{\_}{\_}']} is compiled (to byte-code) \texttt{self.{\_}{\_}x} is expanded to \texttt{self.{\_}C{\_}{\_}x}. However, when one invokes \texttt{D.{\_}{\_}init{\_}{\_}}, a D-object is passed, which has a \texttt{self.{\_}D{\_}{\_}x} attribute, but not a \texttt{self.{\_}C{\_}{\_}x} attribute (unless 'D' is a subclass of 'C'. Fortunately, Python wisdom \begin{quote} \emph{Namespaces are one honking great idea -- let's do more of those!} \end{quote} suggests the right solution: to use a new class with the \emph{same name} of the old one, but in a different namespace, in order to avoid confusion. The simplest way to generate a new namespace is to declare a new class (the class 'New' in this example): then 'New.C' becomes an inner class of 'New'. Since it has the same name of the original class, private variables are correctly expanded and one can freely exchange methods from 'C' to 'New.C' (and viceversa, too). %___________________________________________________________________________ \hypertarget{modifying-hierarchies}{} \pdfbookmark[1]{Modifying hierarchies}{modifying-hierarchies} \subsection*{Modifying hierarchies} \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{def~mod(cls):~return~cls}\\ \mbox{}\\ \mbox{class~New:~pass}\\ \mbox{}\\ \mbox{for~c~in~HomoSapiensSapiens.{\_}{\_}mro{\_}{\_}:}\\ \mbox{~~~~setattr(New,c.{\_}{\_}name{\_}{\_},mod(c))} \end{flushleft}\end{ttfamily} \end{quote} %___________________________________________________________________________ \hypertarget{inspecting-python-code}{} \pdfbookmark[1]{Inspecting Python code}{inspecting-python-code} \subsection*{Inspecting Python code} how to inspect a class, by retrieving useful informations about its information. A first possibility is to use the standard \texttt{help} function. The problem of this approach is that \texttt{help} gives too much information. \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{{\#}plaindata=}\\ \mbox{plainmethod=lambda~m:m~{\#}identity~function}\\ \mbox{}\\ \mbox{class~Get(object):}\\ \mbox{~~~~"""Invoked~as~Get(cls)(xxx)~where~xxx~=~staticmethod,~classmethod,}\\ \mbox{~~~~property,~plainmethod,~plaindata,~returns~the~corresponding~}\\ \mbox{~~~~attributes~as~a~keyword~dictionary.~It~works~by~internally~calling~}\\ \mbox{~~~~the~routine~inspect.classify{\_}class{\_}attrs.~Notice~that~data}\\ \mbox{~~~~attributes~with~double~underscores~are~not~retrieved~}\\ \mbox{~~~~(this~is~by~design)."""}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(self,cls):}\\ \mbox{~~~~~~~~self.staticmethods=kwdict()}\\ \mbox{~~~~~~~~self.classmethods=kwdict()}\\ \mbox{~~~~~~~~self.properties=kwdict()}\\ \mbox{~~~~~~~~self.methods=kwdict()}\\ \mbox{~~~~~~~~self.data=kwdict()}\\ \mbox{~~~~~~~~for~name,~kind,~klass,~attr~in~inspect.classify{\_}class{\_}attrs(cls):}\\ \mbox{~~~~~~~~~~~~if~kind=='static~method':}\\ \mbox{~~~~~~~~~~~~~~~~self.staticmethods[name]=attr}\\ \mbox{~~~~~~~~~~~~elif~kind=='class~method':}\\ \mbox{~~~~~~~~~~~~~~~~self.classmethods[name]=attr}\\ \mbox{~~~~~~~~~~~~elif~kind=='property':}\\ \mbox{~~~~~~~~~~~~~~~~self.properties[name]=attr}\\ \mbox{~~~~~~~~~~~~elif~kind=='method':}\\ \mbox{~~~~~~~~~~~~~~~~self.methods[name]=attr}\\ \mbox{~~~~~~~~~~~~elif~kind=='data':}\\ \mbox{~~~~~~~~~~~~~~~if~not~special(name):~self.data[name]=attr}\\ \mbox{~~~~def~{\_}{\_}call{\_}{\_}(self,descr):~{\#}could~be~done~with~a~dict}\\ \mbox{~~~~~~~~if~descr==staticmethod:~return~self.staticmethods~}\\ \mbox{~~~~~~~~elif~descr==classmethod:~return~self.classmethods}\\ \mbox{~~~~~~~~elif~descr==property:~return~self.properties~}\\ \mbox{~~~~~~~~elif~descr==plainmethod:~return~self.methods}\\ \mbox{~~~~~~~~elif~descr==plaindata:~return~self.data}\\ \mbox{~~~~~~~~else:~raise~SystemExit("Invalid~descriptor")}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} With similar tricks one can automatically recognize cooperative methods: {\#}it is different, (better NOT to use descriptors) \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{{\#}class~Cooperative(Class):}\\ \mbox{{\#}~~~~{\_}{\_}metaclass{\_}{\_}~=~WithWrappingCapabilities}\\ \mbox{{\#}}\\ \mbox{{\#}~~~~def~cooperative(method):}\\ \mbox{{\#}~~~~~~~~~"""Calls~both~the~superclass~method~and~the~class}\\ \mbox{{\#}~~~~~~~~~method~(if~the~class~has~an~explicit~method).~}\\ \mbox{{\#}~~~~~~~~~Works~for~methods~returning~None."""}\\ \mbox{{\#}~~~~~~~~~name,cls=Cooperative.parameters~{\#}~fixed~by~the~meta-metaclass}\\ \mbox{{\#}~~~~~~~~~def~{\_}(*args,**kw):}\\ \mbox{{\#}~~~~~~~~~~~~getattr(super(cls,args[0]),name)(*args[1:],**kw)~}\\ \mbox{{\#}~~~~~~~~~~~~if~method:~method(*args,**kw)~{\#}~call~it}\\ \mbox{{\#}~~~~~~~~~return~{\_}}\\ \mbox{{\#}~~~~}\\ \mbox{{\#}~~~~cooperative=staticmethod(cooperative)}\\ \mbox{}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~wrapH(cls):}\\ \mbox{~~~~for~c~in~cls.{\_}{\_}mro{\_}{\_}[:-2]:}\\ \mbox{~~~~~~~~tracer.namespace=c.{\_}{\_}name{\_}{\_}}\\ \mbox{~~~~~~~~new=vars(c).get('{\_}{\_}new{\_}{\_}',None)}\\ \mbox{~~~~~~~~if~new:~c.{\_}{\_}new{\_}{\_}=tracedmethod(new)}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} %___________________________________________________________________________ \hypertarget{the-magic-of-metaclasses-part-i}{} \pdfbookmark[0]{THE MAGIC OF METACLASSES - PART I}{the-magic-of-metaclasses-part-i} \section*{THE MAGIC OF METACLASSES - PART I} \begin{quote} \begin{flushleft} \emph{Metaclasses~are~deeper~magic~than~99{\%}~of~users~should~ever~\\ worry~about.~~If~you~wonder~whether~you~need~them,~you~don't~\\ (the~people~who~actually~need~them~know~with~certainty~that~\\ they~need~them,~and~don't~need~an~explanation~about~why).}~\\ --Tim~Peters \end{flushleft} \end{quote} Python always had metaclasses, since they are inherent to its object model. However, before Python 2.2, metaclasses where tricky and their study could cause the programmer's brain to explode [\hyperlink{id42}{21}]. Nowadays, the situation has changed, and the reader should be able to understand this chapter without risk for his/her brain (however I do not give any warranty ;) Put it shortly, metaclasses give to the Python programmer complete control on the creation of classes. This simple statement has far reaching consequences, since the ability of interfering with the process of class creation, enable the programmer to make miracles. In this and in the following chapters, I will show some of these miracles. This chapter will focus on subtle problems of metaclasses in inheritance and multiple inheritance, including multiple inheritance of metaclasses with classes and metaclasses with metaclasses. The next chapter will focus more on applications. \begin{figure}[b]\hypertarget{id45}[23] Metaclasses in Python 1.5 [A.k.a the killer joke] \href{http://www.python.org/doc/essays/metaclasses/}{http://www.python.org/doc/essays/metaclasses/} \end{figure} There is very little documentation about metaclasses, except Guido's essays and the papers by David Mertz and myself published in IBMdeveloperWorks \begin{quote} \href{http://www-106.ibm.com/developerworks/library/l-pymeta.html}{http://www-106.ibm.com/developerworks/library/l-pymeta.html} \end{quote} %___________________________________________________________________________ \hypertarget{metaclasses-as-class-factories}{} \pdfbookmark[1]{Metaclasses as class factories}{metaclasses-as-class-factories} \subsection*{Metaclasses as class factories} In the Python object model (inspired from the Smalltalk, that had metaclasses a quarter of century ago!) classes themselves are objects. Now, since objects are instances of classes, that means that classes themselves can be seen as instances of special classes called \emph{metaclasses}. Notice that things get hairy soon, since by following this idea, one could say the metaclasses themselves are classes and therefore objects; that would mean than even metaclasses can be seen as instances of special classes called meta-metaclasses. On the other hand, meta-meta-classes can be seen as instances of meta-meta-metaclasses, etc. Now, it should be obvious why metaclasses have gained such a reputation of brain-exploders ;). However, fortunately, the situation is not so bad in practice, since the infinite recursion of metaclasses is avoided because there is a metaclass that is the ''mother of all metaclasses``: the built-in metaclass \emph{type}. 'type' has the property of being its own metaclass, therefore the recursion stops. Consider for instance the following example: \begin{quote} \begin{verbatim}>>> class C(object): pass # a generic class >>> type(C) #gives the metaclass of C >>> type(type(C)) #gives the metaclass of type \end{verbatim} \end{quote} The recursion stops, since the metaclass of 'type' is 'type'. One cool consequence of classes being instances of 'type', is that since \emph{type} is a subclass of object, \begin{quote} \begin{verbatim}>>> issubclass(type,object) True \end{verbatim} \end{quote} any Python class is not only a subclass of \texttt{object}, but also an instance of 'object': \begin{quote} \begin{verbatim}>>> isinstance(C,type) True >>> isinstance(C,object) True >>> issubclass(C,object) True\end{verbatim} \end{quote} Notice that 'type' is an instance of itself (!) and therefore of 'object': \begin{quote} \begin{verbatim}>>> isinstance(type,type) # 'type' is an instance of 'type' True >>> isinstance(type,object) # therefore 'type' is an instance of 'object' True\end{verbatim} \end{quote} As it is well known, \texttt{type(X)} returns the type of \texttt{X}; however, \texttt{type} has also a second form in which it acts as a class factory. The form is \texttt{type(name,bases,dic)} where \texttt{name} is the name of the new class to be created, bases is the tuple of its bases and dic is the class dictionary. Let me give a few examples: \begin{quote} \begin{verbatim}>>> C=type('C',(),{}) >>> C >>> C.__name__ 'C' >>> C.__bases__ (,) >>> C.__dict__ \end{verbatim} \end{quote} Notice that since all metaclasses inherits from \texttt{type}, as a consequences all metaclasses can be used as class factories. A fairy tale example will help in understanding the concept and few subtle points on how attributes are transmitted from metaclasses to their instances. Let me start by defining a 'Nobility' metaclass : \begin{quote} \begin{verbatim}>>> class Nobility(type): attributes="Power,Richness,Beauty"\end{verbatim} \end{quote} instances of 'Nobility' are classes such 'Princes', 'Dukes', 'Barons', etc. \begin{quote} \begin{verbatim}>>> Prince=Nobility("Prince",(),{})\end{verbatim} \end{quote} Instances of 'Nobility' inherits its attributes, just as instances of normal classes inherits the class docstring: \begin{quote} \begin{verbatim}>>> Prince.attributes 'Power,Richness,Beauty'\end{verbatim} \end{quote} Nevertheless, 'attributes' will not be retrieved by the \texttt{dir} function: \begin{quote} \begin{verbatim}>>> print dir(Prince) ['__class__', '__delattr__', '__dict__', '__doc__', '__getattribute__', '__hash__', '__init__', '__module__', '__new__', '__reduce__', '__repr__', '__setattr__', '__str__', '__weakref__']\end{verbatim} \end{quote} However, this is a limitation of \texttt{dir}, in reality \texttt{Prince.attributes} is there. On the other hand, the situation is different for a specific 'Prince' object \begin{quote} \begin{verbatim}>>> charles=Prince() >>> charles.attributes #error Traceback (most recent call last): File "", line 1, in ? AttributeError: 'Prince' object has no attribute 'attributes'\end{verbatim} \end{quote} The transmission of metaclass attributes is not transitive: instances of the metaclass inherits the attributes, but not the instances of the instances. This behavior is by design and is needed in order to avoid troubles with special methods. This point will be throughly explained in the last paragraph. For the moment, I my notice that the behaviour is reasonable, since the abstract qualities 'Power,Richness,Beauty' are more qualities of the 'Prince' class than of one specific representative. They can always be retrieved via the \texttt{{\_}{\_}class{\_}{\_}} attribute: \begin{quote} \begin{verbatim}>>> charles.__class__.attributes 'Power,Richness,Beauty'\end{verbatim} \end{quote} Le me now define a metaclass 'Froggyness': \begin{quote} \begin{verbatim}>>> class Frogginess(type): attributes="Powerlessness,Poverty,Uglyness"\end{verbatim} \end{quote} Instances of 'Frogginess' are classes like 'Frog', 'Toad', etc. \begin{quote} \begin{verbatim}>>> Frog=Frogginess("Frog",(),{}) >>> Frog.attributes 'Powerlessness,Poverty,Uglyness'\end{verbatim} \end{quote} However, in Python miracles can happen: \begin{quote} \begin{verbatim}>>> def miracle(Frog): Frog.__class__=Nobility >>> miracle(Frog); Frog.attributes 'Powerlessness,Richness,Beauty'\end{verbatim} \end{quote} In this example a miracle happened on the class 'Frog', by changing its (meta)class to 'Nobility'; therefore its attributes have changed accordingly. However, there is subtle point here. Suppose we explicitly specify the 'Frog' attributes, in such a way that it can be inherited by one of its specific representative: \begin{quote} \begin{verbatim}>>> Frog.attributes="poor, small, ugly" >>> jack=Frog(); jack.attributes 'poor, small, ugly'\end{verbatim} \end{quote} Then the miracle cannot work: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~Nobility(type):~attributes="Power,~Richness,~Beauty"}\\ \mbox{Prince=Nobility("Prince",(),{\{}{\}})}\\ \mbox{charles=Prince()}\\ \mbox{}\\ \mbox{class~Frogginess(type):~attributes="Inpuissance,~Poverty,~Uglyness"}\\ \mbox{Frog=Frogginess("Frog",(),{\{}{\}})}\\ \mbox{Frog.attributes="poor,~small,~ugly"}\\ \mbox{jack=Frog()}\\ \mbox{}\\ \mbox{def~miracle(Frog):~Frog.{\_}{\_}class{\_}{\_}=Nobility}\\ \mbox{}\\ \mbox{miracle(Frog)}\\ \mbox{}\\ \mbox{print~"I~am",Frog.attributes,"even~if~my~class~is",Frog.{\_}{\_}class{\_}{\_}}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Output: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{I~am~poor,~small,~ugly~even~if~my~class~is~} \end{flushleft}\end{ttfamily} \end{quote} The reason is that Python first looks at specific attributes of an object (in this case the object is the class 'Frog') an only if they are not found, it looks at the attributes of its class (here the metaclass 'Nobility').Since in this example the 'Frog' class has explicit attributes, the result is \texttt{poor, small, ugly}. If you think a bit, it makes sense. Remark: In Python 2.3 there are restrictions when changing the \texttt{{\_}{\_}class{\_}{\_}} attribute for classes: \begin{quote} \begin{verbatim}>>> C=type('C',(),{}) >>> C.__class__ = Nobility #error Traceback (most recent call last): File "", line 1, in ? TypeError: __class__ assignment: only for heap types\end{verbatim} \end{quote} Here changing \texttt{C.{\_}{\_}class{\_}{\_}} is not allowed, since 'C' is an instance of the built-in metaclass 'type'. This restriction, i.e. the fact that the built-in metaclass cannot be changed, has been imposed for security reasons, in order to avoid dirty tricks with the built-in classes. For instance, if it was possible to change the metaclass of the 'bool' class, we could arbitrarily change the behavior of boolean objects. This could led to abuses. Thanks to this restriction, the programmer is always sure that built-in classes behaves as documented. This is also the reason why 'bool' cannot be subclassed: \begin{quote} \begin{verbatim}>>> print bool.__doc__ # in Python 2.2 would give an error bool(x) -> bool Returns True when the argument x is true, False otherwise. The builtins True and False are the only two instances of the class bool. The class bool is a subclass of the class int, and cannot be subclassed.\end{verbatim} \end{quote} In any case, changing the class of a class is not a good idea, since it does not play well with inheritance, i.e. changing the metaclass of a base class does not change the metaclass of its children: \begin{quote} \begin{verbatim}>>> class M1(type): f=lambda cls: 'M1.f' #metaclass1 >>> class M2(type): f=lambda cls: 'M2.f' #metaclass2 >>> B=M1('B',(),{}) # B receives M1.f >>> class C(B): pass #C receives M1.f >>> B.f() 'M1.f' B.__class__=M2 #change the metaclass >>> B.f() #B receives M2.f 'M2.f' C.f() #however C does *not* receive M2.f >>> C.f() 'M1.f' >>> type(B) >>> type(C) \end{verbatim} \end{quote} %___________________________________________________________________________ \hypertarget{metaclasses-as-class-modifiers}{} \pdfbookmark[1]{Metaclasses as class modifiers}{metaclasses-as-class-modifiers} \subsection*{Metaclasses as class modifiers} The interpretation of metaclasses in terms of class factories is quite straightforward and I am sure that any Pythonista will be at home with the concept. However, metaclasses have such a reputation of black magic since their typical usage is \emph{not} as class factories, but as \emph{class modifiers}. This means that metaclasses are typically used to modify \emph{in fieri} classes. The trouble is that the modification can be utterly magical. Here there is another fairy tale example showing the syntax (via the \texttt{{\_}{\_}metaclass{\_}{\_}} hook) and the magic of the game: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~UglyDuckling(PrettyPrinted):}\\ \mbox{~~~~"A~plain,~regular~class"}\\ \mbox{~~~~formatstring="Not~beautiful,~I~am~{\%}s"}\\ \mbox{}\\ \mbox{class~MagicallyTransformed(type):}\\ \mbox{~~~~"Metaclass~changing~the~formatstring~of~its~instances"}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(cls,*args):}\\ \mbox{~~~~~~~~cls.formatstring="Very~beautiful,~since~I~am~{\%}s"}\\ \mbox{~~~~~~~~}\\ \mbox{class~TransformedUglyDuckling(PrettyPrinted):}\\ \mbox{~~~~"A~class~metamagically~modified"}\\ \mbox{~~~~{\_}{\_}metaclass{\_}{\_}~=~MagicallyTransformed}\\ \mbox{~~~~formatstring="Not~beautiful,~I~am~{\%}s"~{\#}~will~be~changed}\\ \mbox{}\\ \mbox{{\#}}\\ \mbox{}\\ \mbox{>>>~from~oopp~import~*}\\ \mbox{>>>~print~UglyDuckling()}\\ \mbox{Not~beautiful,~I~am~} \end{flushleft}\end{ttfamily} \end{quote} In this example, even if in 'TransformedUglyDuckling' we explicitely set the formatstring to ''Not beautiful, I am {\%}s``, the metaclass changes it to ''Very beautiful, even if I am {\%}s`` and thus \begin{quote} \begin{verbatim}>>> print TransformedUglyDuckling() # gives Very beautiful, since I am \end{verbatim} \end{quote} Notice that the \texttt{{\_}{\_}metaclass{\_}{\_}} hook passes to the metaclass \texttt{MagicallyTransformed} the name, bases and dictionary of the class being created, i.e. 'TransformedUglyDucking'. Metaclasses, when used as class modifiers, act \emph{differently} from functions, when inheritance is involved. To clarify this subtle point, consider a subclass 'Swan' of 'UglyDuckling': \begin{quote} \begin{verbatim}>>> from oopp import * >>> class Swan(UglyDuckling): ... formatstring="Very beautiful, I am %s" >>> print Swan() Very beautiful, I am \end{verbatim} \end{quote} Now, let me define a simple function acting as a class modifier: \begin{quote} \begin{verbatim}>>> def magicallyTransform(cls): ... "Modifies the class formatstring" ... customize(cls,formatstring="Very beautiful, even if I am %s") ... return cls\end{verbatim} \end{quote} The function works: \begin{quote} \begin{verbatim}>>> magicallyTransform(UglyDuckling) >>> print UglyDuckling() Very beautiful, even if I am \end{verbatim} \end{quote} This approach is destructive, since we cannot have the original and the transformed class at the same time, and has potentially bad side effects in the derived classes. Nevertheless, in this case it works and it is not dangereous for the derived class 'Swan', since 'Swan' explicitly overrides the 'formatstring' attribute and doesn't care about the change in 'UglyDuckling.formatstring'. Therefore the output of \begin{quote} \begin{verbatim}>>> print Swan() Very beautiful, I am \end{verbatim} \end{quote} is still the same as before the action of the function \texttt{magicallyTransform}. The situation is quite different if we use the 'MagicallyTransformed' metaclass: \begin{quote} \begin{verbatim}>>> from oopp import * >>> class Swan(TransformedUglyDuckling): ... formatstring="Very beautiful, I am %s"\end{verbatim} \begin{verbatim}>>> print TransformedUglyDuckling() Very beautiful, since I am >>> print Swan() # does *not* print "Very beautiful, I am " Very beautiful, since I am \end{verbatim} \end{quote} Therefore, not only the metaclass has magically transformed the 'TransformedUglyDuckling.formatstring', it has also transformed the 'Swan.formatstring'! And that, despite the fact that 'Swan.formatstring' is explicitly set. The reason for this behaviour is that since 'UglyDuckling' is a base class with metaclass 'MagicallyTransformed', and since 'Swan' inherits from 'UglyDuckling', then 'Swan' inherits the metaclass 'MagicallyTransformed', which is automatically called at 'Swan' creation time. That's the reason why metaclasses are much more magical and much more dangerous than functions: functions do not override attributes in the derived classes, metaclasses do, since they are automagically called at the time of creation of the subclass. In other words, functions are explicit, metaclasses are implicit. Nevertheless, this behavior can be pretty useful in many circumstances, and it is a feature, not a bug. In the situations where this behavior is not intended, one should use a function, not a metaclass. In general, metaclasses are better than functions, since metaclasses are classes and as such they can inherit one from each other. This means that one can improve a basic metaclass trough (multiple) inheritance, with \emph{reuse} of code. %___________________________________________________________________________ \hypertarget{a-few-caveats-about-the-usage-of-metaclasses}{} \pdfbookmark[1]{A few caveats about the usage of metaclasses}{a-few-caveats-about-the-usage-of-metaclasses} \subsection*{A few caveats about the usage of metaclasses} Let me start with some caveats about the \texttt{{\_}{\_}metaclass{\_}{\_}} hook, which commonly used and quite powerful, but also quite dangereous. Let's imagine a programmer not knowing about metaclasses and looking at the 'TransformedUglyDuckling' code (assuming there are no comments): she would probably think that ''{\_}{\_}metaclass{\_}{\_}`` is some special attribute used for introspection purposes only, with no other effects, and she would probably expect the output of the script to be ''Not much, I am the class TransformedUglyDucking`` whereas it is exacly the contrary! In other words, when metaclasses are involved, \emph{what you see, is not what you get}. The situation is even more implicit when the metaclass is inherited from some base class, therefore lacking also the visual clue of the hook. For these reasons, metaclasses are something to be used with great care; they can easily make your code unreadable and confuse inexpert programmers. Moreover, it is more difficult to debug programs involving metaclasses, since methods are magically transformed by routines defined in the metaclass, and the code you see in the class is \emph{not} what Python sees. I think the least confusing way of using metaclasses, is to concentrate all the dynamics on them and to write empty classes except for the metaclass hook. If you write a class with no methods such as \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{class~TransformedUglyDuckling(object):}\\ \mbox{~~~~{\_}{\_}metaclass{\_}{\_}=MagicallyTransformed} \end{flushleft}\end{ttfamily} \end{quote} then the only place to look at, is the metaclass. I have found extremely confusing to have some of the methods defined in the class and some in the metaclass, especially during debugging. Another point to make, is that the \texttt{{\_}{\_}metaclass{\_}{\_}} hook should not be used to modify pre-existing classes, since it requires modifying the source code (even if it is enough to change one line only). Moreover, it is confusing, since adding a \texttt{{\_}{\_}metaclass{\_}{\_}} attribute \emph{after} the class creation would not do the job: \begin{quote} \begin{verbatim}>>> from oopp import UglyDuckling, MagicallyTransformed >>> UglyDuckling.__metaclass__=MagicallyTransformed >>> print UglyDuckling() "Not much, I am the class UglyDuckling"\end{verbatim} \end{quote} The reason is that we have to think of UglyDuckling as an instance of \texttt{type}, the built-in metaclasses; merely adding a \texttt{{\_}{\_}metaclass{\_}{\_}} attribute does not re-initialize the class. The problem is elegantly solved by avoiding the hook and creating an enhanced copy of the original class trough \texttt{MagicallyTransformed} used as a class factory. \begin{quote} \begin{verbatim}>>> name=UglyDuckling.__name__ >>> bases=UglyDuckling.__bases__ >>> dic=UglyDuckling.__dict__.copy() >>> UglyDuckling=MagicallyTransformed(name,bases,dic)\end{verbatim} \end{quote} Notice that I have recreated 'UglyDuckling', giving to the new class the old identifier. \begin{quote} \begin{verbatim}>>> print UglyDuckling() Very beautiful, since I am >\end{verbatim} \end{quote} The metaclass of this new 'UglyDuckling' has been specified and will accompanies all future children of 'UglyDuckling': \begin{quote} \begin{verbatim}>>> class Swan(UglyDuckling): pass ... >>> type(Swan) \end{verbatim} \end{quote} Another caveat, is in the overridding of `` {\_}{\_}init{\_}{\_}`` in the metaclass. This is quite common in the case of metaclasses called trough the \texttt{{\_}{\_}metaclass{\_}{\_}} hook mechanism, since in this case the class has been already defined (if not created) in the class statement, and we are interested in initializing it, more than in recreating it (which is still possible, by the way). The problem is that overriding \texttt{{\_}{\_}init{\_}{\_}} has severe limitations with respect to overriding \texttt{{\_}{\_}new{\_}{\_}}, since the 'name', 'bases' and 'dic' arguments cannot be directly changed. Let me show an example: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{from~oopp~import~*}\\ \mbox{}\\ \mbox{class~M(type):}\\ \mbox{~~~~"Shows~that~dic~cannot~be~modified~in~{\_}{\_}init{\_}{\_},~only~in~{\_}{\_}new{\_}{\_}"}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(cls,name,bases,dic):}\\ \mbox{~~~~~~~~name='C~name~cannot~be~changed~in~{\_}{\_}init{\_}{\_}'}\\ \mbox{~~~~~~~~bases='cannot~be~changed'}\\ \mbox{~~~~~~~~dic['changed']=True}\\ \mbox{}\\ \mbox{class~C(object):}\\ \mbox{~~~~{\_}{\_}metaclass{\_}{\_}=M}\\ \mbox{~~~~changed=False}\\ \mbox{}\\ \mbox{print~C.{\_}{\_}name{\_}{\_}~~{\#}~=>~C}\\ \mbox{print~C.{\_}{\_}bases{\_}{\_}~{\#}~=>~(,)}\\ \mbox{print~C.changed~~~{\#}~=>~False}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} The output of this script is \texttt{False}: the dictionary cannot be changed in \texttt{{\_}{\_}init{\_}{\_}} method. However, replacing \texttt{dic['changed']=True} with \texttt{cls.changed=True} would work. Analougously, changing \texttt{cls.{\_}{\_}name{\_}{\_}} would work. On the other hand, \texttt{{\_}{\_}bases{\_}{\_}} is a read-only attribute and cannot be changed once the class has been created, therefore there is no way it can be touched in \texttt{{\_}{\_}init{\_}{\_}}. However, \texttt{{\_}{\_}bases{\_}{\_}} could be changed in \texttt{{\_}{\_}new{\_}{\_}} before the class creation. %___________________________________________________________________________ \hypertarget{metaclasses-and-inheritance}{} \pdfbookmark[1]{Metaclasses and inheritance}{metaclasses-and-inheritance} \subsection*{Metaclasses and inheritance} It is easy to get confused about the difference between a metaclass and a mix-in class in multiple inheritance, since both are denoted by adjectives and both share the same idea of enhancing a hierarchy. Moreover, both mix-in classes and metaclasses can be inherited in the whole hierarchy. Nevertheless, they behaves differently and there are various subtle point to emphasize. We have already noticed in the first section that attributes of a metaclass are transmitted to its instances, but not to the instances of the instances, whereas the normal inheritance is transitive: the grandfather transmits its attributes to the children and to the grandchild too. The difference can be represented with the following picture, where 'M' is the metaclass, 'B' a base class, 'C' a children of 'B' and c an instance of 'C': \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{M~(attr)~~~~~~~~~B~(attr)~~~}\\ \mbox{:~~~~~~~~~~~~~~~~|}\\ \mbox{C~(attr)~~~~~~~~~C~(attr)~~~~}\\ \mbox{:~~~~~~~~~~~~~~~~:}\\ \mbox{c~()~~~~~~~~~~~~~c~(attr)~~~~} \end{flushleft}\end{ttfamily} \end{quote} Notice that here the relation of instantiation is denoted by a dotted line. This picture is valid when C has metaclass M but not base class, on when C has base class but not metaclass. However, what happens whrn the class C has both a metaclass M and a base class B ? \begin{quote} \begin{verbatim}>>> class M(type): a='M.a' >>> class B(object): a='B.a' >>> class C(B): __metaclass__=M >>> c=C()\end{verbatim} \end{quote} The situation can be represented by in the following graph, \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{(M.a)~~~M~~~B~~(B.a)}\\ \mbox{~~~~~~~~:~~/}\\ \mbox{~~~~~~~~:~/}\\ \mbox{~~~(?)~~C}\\ \mbox{~~~~~~~~:}\\ \mbox{~~~~~~~~:}\\ \mbox{~~~(?)~~c} \end{flushleft}\end{ttfamily} \end{quote} Here the metaclass M and the base class B are fighting one against the other. Who wins ? C should inherit the attribute 'B.a' from its base B, however, the metaclass would like to induce an attribute 'M.a'. The answer is that the inheritance constraint wins on the metaclass contraint: \begin{quote} \begin{verbatim}>>> C.a 'B.a' >>> c.a 'B.a'\end{verbatim} \end{quote} The reason is the same we discussed in the fairy tale example: 'M.a' is an attribute of the metaclass, if its instance C has already a specified attributed C.a (in this case specified trough inheritance from B), then the attribute is not modified. However, one could \emph{force} the modification: \begin{quote} \begin{verbatim}>>> class M(type): ... def __init__(cls,*args): cls.a='M.a' >>> class C(B): __metaclass__=M >>> C.a 'M.a'\end{verbatim} \end{quote} In this case the metaclass M would win on the base class B. Actually, this is not surprising, since it is explicit. What could be surprising, had we not explained why inheritance silently wins, is that \begin{quote} \begin{verbatim}>>> c.a 'B.a'\end{verbatim} \end{quote} This explain the behaviour for special methods like \texttt{{\_}{\_}new{\_}{\_},{\_}{\_}init{\_}{\_},{\_}{\_}str{\_}{\_}}, etc. which are defined both in the class and the metaclass with the same name (in both cases,they are inherited from \texttt{object}). In the chapter on objects, we learned that the printed representation of an object can be modified by overring the \texttt{{\_}{\_}str{\_}{\_}} methods of its class. In the same sense, the printed representation of a class can be modified by overring the \texttt{{\_}{\_}str{\_}{\_}} methods of its metaclass. Let me show an example: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~Printable(PrettyPrinted,type):}\\ \mbox{~~~"""Apparently~does~nothing,~but~actually~makes~PrettyPrinted~acting~as}\\ \mbox{~~~~~~a~metaclass."""}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Instances of 'Printable' are classes with a nice printable representation: \begin{quote} \begin{verbatim}>>> from oopp import Printable >>> C=Printable('Classname',(),{}) >>> print C Classname\end{verbatim} \end{quote} However, the internal string representation stays the same: \begin{quote} \begin{verbatim}>>> C # invokes Printable.__repr__ \end{verbatim} \end{quote} Notice that the name of class 'C' is \texttt{Classname} and not 'C' ! Consider for instance the following code: \begin{quote} \begin{verbatim}>>> class M(type): ... def __str__(cls): ... return cls.__name__ ... def method(cls): ... return cls.__name__ ... >>> class C(object): ... __metaclass__=M >>> c=C()\end{verbatim} \end{quote} In this case the \texttt{{\_}{\_}str{\_}{\_}} method in \texttt{M} cannot override the \texttt{{\_}{\_}str{\_}{\_}} method in C, which is inherited from \texttt{object}. Moreover, if you experiment a little, you will see that \begin{quote} \begin{verbatim}>>> print C # is equivalent to print M.__str__(C) C >>> print c # is equivalent to print C.__str__(c) <__main__.C object at 0x8158f54>\end{verbatim} \end{quote} The first \texttt{{\_}{\_}str{\_}{\_}} is ''attached`` to the metaclass and the second to the class. Consider now the standard method ''method``. It is both attached to the metaclass \begin{quote} \begin{verbatim}>>> print M.method(C) C\end{verbatim} \end{quote} and to the class \begin{quote} \begin{verbatim}>>> print C.method() #in a sense, this is a class method, i.e. it receives C #the class as first argument\end{verbatim} \end{quote} Actually it can be seen as a class method of 'C' (cfr. Guido van Rossum ''Unifying types and classes in Python 2.2``. When he discusses classmethods he says: \emph{''Python also has real metaclasses, and perhaps methods defined in a metaclass have more right to the name ``class method''; but I expect that most programmers won't be using metaclasses``}). Actually, this is the SmallTalk terminology, Unfortunately, in Python the word \texttt{classmethod} denotes an attribute descriptor, therefore it is better to call the methods defined in a metaclass \emph{metamethods}, in order to avoid any possible confusion. The difference between \texttt{method} and \texttt{{\_}{\_}str{\_}{\_}} is that you cannot use the syntax \begin{quote} \begin{verbatim}>>> print C.__str__() #error TypeError: descriptor '__str__' of 'object' object needs an argument\end{verbatim} \end{quote} because of the confusion with the other {\_}{\_}str{\_}{\_}; you can only use the syntax \begin{quote} \begin{verbatim}>>> print M.__str__(C)\end{verbatim} \end{quote} Suppose now I change C's definition by adding a method called ''method``: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{class~C(object):}\\ \mbox{~~~~{\_}{\_}metaclass{\_}{\_}=M}\\ \mbox{~~~~def~{\_}{\_}str{\_}{\_}(self):}\\ \mbox{~~~~~~~~return~"instance~of~{\%}s"~{\%}~self.{\_}{\_}class{\_}{\_}}\\ \mbox{~~~~def~method(self):}\\ \mbox{~~~~~~~~return~"instance~of~{\%}s"~{\%}~self.{\_}{\_}class{\_}{\_}} \end{flushleft}\end{ttfamily} \end{quote} If I do so, then there is name clashing and the previously working statement print C.method() gives now an error: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{Traceback~(most~recent~call~last):}\\ \mbox{~~File~"",~line~24,~in~?}\\ \mbox{TypeError:~unbound~method~method()~must~be~called~with~C~instance~as}\\ \mbox{first~argument~(got~nothing~instead)} \end{flushleft}\end{ttfamily} \end{quote} Conclusion: \texttt{{\_}{\_}str{\_}{\_}, {\_}{\_}new{\_}{\_}, {\_}{\_}init{\_}{\_}} etc. defined in the metaclass have name clashing with the standard methods defined in the class, therefore they must be invoked with the extended syntax (ex. \texttt{M.{\_}{\_}str{\_}{\_}(C)}), whereas normal methods in the metaclass with no name clashing with the methods of the class can be used as class methods (ex. \texttt{C.method()} instead of \texttt{M.method(C)}). Metaclass methods are always bound to the metaclass, they bind to the class (receiving the class as first argument) only if there is no name clashing with already defined methods in the class. Which is the case for \texttt{{\_}{\_}str{\_}{\_}}, \texttt{{\_}{\_}{\_}init{\_}{\_}}, etc. %___________________________________________________________________________ \hypertarget{conflicting-metaclasses}{} \pdfbookmark[1]{Conflicting metaclasses}{conflicting-metaclasses} \subsection*{Conflicting metaclasses} Consider a class 'A' with metaclass 'M{\_}A' and a class 'B' with metaclass 'M{\_}B'; suppose I derive 'C' from 'A' and 'B'. The question is: what is the metaclass of 'C' ? Is it 'M{\_}A' or 'M{\_}B' ? The correct answer (see ''Putting metaclasses to work`` for a thought discussion) is 'M{\_}C', where 'M{\_}C' is a metaclass that inherits from 'M{\_}A' and 'M{\_}B', as in the following graph: \begin{quote} \begin{figure} \includegraphics{fig1.ps} \end{figure} \end{quote} However, Python is not yet that magic, and it does not automatically create 'M{\_}C'. Instead, it will raise a \texttt{TypeError}, warning the programmer of the possible confusion: \begin{quote} \begin{verbatim}>>> class M_A(type): pass >>> class M_B(type): pass >>> A=M_A('A',(),{}) >>> B=M_B('B',(),{}) >>> class C(A,B): pass #error Traceback (most recent call last): File "", line 1, in ? TypeError: metatype conflict among bases\end{verbatim} \end{quote} This is an example where the metaclasses 'M{\_}A' and 'M{\_}B' fight each other to generate 'C' instead of cooperating. The metatype conflict can be avoided by assegning the correct metaclass to 'C' by hand: \begin{quote} \begin{verbatim}>>> class C(A,B): __metaclass__=type("M_AM_B",(M_A,M_B),{}) >>> type(C) \end{verbatim} \end{quote} In general, a class A(B, C, D , ...) can be generated without conflicts only if type(A) is a subclass of each of type(B), type(C), ... In order to avoid conflicts, the following function, that generates the correct metaclass by looking at the metaclasses of the base classes, is handy: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{metadic={\{}{\}}}\\ \mbox{}\\ \mbox{def~{\_}generatemetaclass(bases,metas,priority):}\\ \mbox{~~~~trivial=lambda~m:~sum([issubclass(M,m)~for~M~in~metas],m~is~type)}\\ \mbox{~~~~{\#}~hackish!!~m~is~trivial~if~it~is~'type'~or,~in~the~case~explicit}\\ \mbox{~~~~{\#}~metaclasses~are~given,~if~it~is~a~superclass~of~at~least~one~of~them}\\ \mbox{~~~~metabs=tuple([mb~for~mb~in~map(type,bases)~if~not~trivial(mb)])}\\ \mbox{~~~~metabases=(metabs+metas,~metas+metabs)[priority]}\\ \mbox{~~~~if~metabases~in~metadic:~{\#}~already~generated~metaclass}\\ \mbox{~~~~~~~~return~metadic[metabases]}\\ \mbox{~~~~elif~not~metabases:~{\#}~trivial~metabase}\\ \mbox{~~~~~~~~meta=type~}\\ \mbox{~~~~elif~len(metabases)==1:~{\#}~single~metabase}\\ \mbox{~~~~~~~~meta=metabases[0]}\\ \mbox{~~~~else:~{\#}~multiple~metabases}\\ \mbox{~~~~~~~~metaname="{\_}"+''.join([m.{\_}{\_}name{\_}{\_}~for~m~in~metabases])}\\ \mbox{~~~~~~~~meta=makecls()(metaname,metabases,{\{}{\}})}\\ \mbox{~~~~return~metadic.setdefault(metabases,meta)}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} This function is particularly smart since: \begin{quote} \newcounter{listcnt29} \begin{list}{\arabic{listcnt29}.} { \usecounter{listcnt29} \setlength{\rightmargin}{\leftmargin} } \item {} Avoid duplications .. \item {} Remember its results. \end{list} \end{quote} We may generate the child of a tuple of base classes with a given metaclass and avoiding metatype conflicts thanks to the following \texttt{child} function: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~makecls(*metas,**options):}\\ \mbox{~~~~"""Class~factory~avoiding~metatype~conflicts.~The~invocation~syntax~is}\\ \mbox{~~~~makecls(M1,M2,..,priority=1)(name,bases,dic).~If~the~base~classes~have~}\\ \mbox{~~~~metaclasses~conflicting~within~themselves~or~with~the~given~metaclasses,}\\ \mbox{~~~~it~automatically~generates~a~compatible~metaclass~and~instantiate~it.~}\\ \mbox{~~~~If~priority~is~True,~the~given~metaclasses~have~priority~over~the~}\\ \mbox{~~~~bases'~metaclasses"""}\\ \mbox{}\\ \mbox{~~~~priority=options.get('priority',False)~{\#}~default,~no~priority}\\ \mbox{~~~~return~lambda~n,b,d:~{\_}generatemetaclass(b,metas,priority)(n,b,d)}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Here is an example of usage: \begin{quote} \begin{verbatim}>>> class C(A,B): __metaclass__=makecls() >>> print C,type(C) \end{verbatim} \end{quote} Notice that the automatically generated metaclass does not pollute the namespace: \begin{quote} \begin{verbatim}>>> _M_A_M_B #error Traceback (most recent call last): File "", line 1, in ? NameError: name '_M_A_M_B' is not defined\end{verbatim} \end{quote} It can only be accessed as \texttt{type(C)}. Put it shortly, the \texttt{child} function allows to generate a child from bases enhanced by different custom metaclasses, by generating under the hood a compatibile metaclass via multiple inheritance from the original metaclasses. However, this logic can only work if the original metaclasses are cooperative, i.e. their methods are written in such a way to avoid collisions. This can be done by using the cooperative the \texttt{super} call mechanism discussed in chapter 4. %___________________________________________________________________________ \hypertarget{cooperative-metaclasses}{} \pdfbookmark[1]{Cooperative metaclasses}{cooperative-metaclasses} \subsection*{Cooperative metaclasses} In this section I will discuss how metaclasses can be composed with classes and with metaclasses, too. Since we will discusss even complicated hierarchies, it is convenient to have an utility routine printing the MRO of a given class: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~MRO(cls):}\\ \mbox{~~~~count=0;~out=[]}\\ \mbox{~~~~print~"MRO~of~{\%}s:"~{\%}~cls.{\_}{\_}name{\_}{\_}}\\ \mbox{~~~~for~c~in~cls.{\_}{\_}mro{\_}{\_}:}\\ \mbox{~~~~~~~~name=c.{\_}{\_}name{\_}{\_}}\\ \mbox{~~~~~~~~bases=','.join([b.{\_}{\_}name{\_}{\_}~for~b~in~c.{\_}{\_}bases{\_}{\_}])}\\ \mbox{~~~~~~~~s="~~{\%}s~-~{\%}s({\%}s)"~{\%}~(count,name,bases)}\\ \mbox{~~~~~~~~if~type(c)~is~not~type:~s+="[{\%}s]"~{\%}~type(c).{\_}{\_}name{\_}{\_}}\\ \mbox{~~~~~~~~out.append(s);~count+=1}\\ \mbox{~~~~return~'{\textbackslash}n'.join(out)}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Notice that \texttt{MRO} also prints the metaclass' name in square brackets, for classes enhanced by a non-trivial metaclass. Consider for instance the following hierarchy: \begin{quote} \begin{verbatim}>>> from oopp import MRO >>> class B(object): pass >>> class M(B,type): pass >>> class C(B): __metaclass__=M\end{verbatim} \end{quote} Here 'M' is a metaclass that inherits from 'type' and the base class 'B' and 'C' is both an instance of 'M' and a child of 'B'. The inheritance graph can be draw as \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{~object}\\ \mbox{~/~~~{\textbackslash}}\\ \mbox{B~~~~type}\\ \mbox{|~{\textbackslash}~~/}\\ \mbox{|~~M}\\ \mbox{{\textbackslash}~~:}\\ \mbox{~{\textbackslash}~:~~~~~}\\ \mbox{~~~C} \end{flushleft}\end{ttfamily} \end{quote} Suppose now we want to retrieve the \texttt{{\_}{\_}new{\_}{\_}} method of B's superclass with respect to the MRO of C: obviously, this is \texttt{object.{\_}{\_}new{\_}{\_}}, since \begin{quote} \begin{verbatim}>>> print MRO(C) MRO of C: 0 - C(B)[M] 1 - B(object) 2 - object()\end{verbatim} \end{quote} This allows to create an instance of 'C' in this way: \begin{quote} \begin{verbatim}>>> super(B,C).__new__(C) <__main__.C object at 0x4018750c>\end{verbatim} \end{quote} It is interesting to notice that this would not work in Python 2.2, due to a bug in the implementation of \texttt{super}, therefore do not try this trick with older version of Python. Notice that everything works only because \texttt{B} inherits the \texttt{object.{\_}{\_}new{\_}{\_}} staticmethod that is cooperative and it turns out that it calls \texttt{type.{\_}{\_}new{\_}{\_}}. However, if I give to 'B' a non-cooperative method \begin{quote} \begin{verbatim}>>> B.__new__=staticmethod(lambda cls,*args: object.__new__(cls))\end{verbatim} \end{quote} things do not work: \begin{quote} \begin{verbatim}>>> M('D',(),{}) #error Traceback (most recent call last): File "", line 1, in ? File "", line 1, in TypeError: object.__new__(M) is not safe, use type.__new__()\end{verbatim} \end{quote} A cooperative method would solve the problem: \begin{quote} \begin{verbatim}>>> B.__new__=staticmethod(lambda m,*args: super(B,m).__new__(m,*args)) >>> M('D',(),{}) # calls B.__new__(M,'D',(),{}) \end{verbatim} \end{quote} %___________________________________________________________________________ \hypertarget{metamethods-vs-class-methods}{} \pdfbookmark[1]{Metamethods vs class methods}{metamethods-vs-class-methods} \subsection*{Metamethods vs class methods} Meta-methods, i.e. methods defined in a metaclass. Python has already few built-in metamethods: \texttt{.mro()} and \texttt{{\_}{\_}subclass{\_}{\_}}. These are methods of the metaclass 'type' and there of any of its sub-metaclasses. \begin{quote} \begin{verbatim}>>> dir(type) ['__base__', '__bases__', '__basicsize__', '__call__', '__class__', '__cmp__', '__delattr__', '__dict__', '__dictoffset__', '__doc__', '__flags__', '__getattribute__', '__hash__', '__init__', '__itemsize__', '__module__', '__mro__', '__name__', '__new__', '__reduce__', '__repr__', '__setattr__', '__str__', '__subclasses__', '__weakrefoffset__', 'mro']\end{verbatim} \begin{verbatim}>>> print type.mro.__doc__ mro() -> list return a type's method resolution order >>> print type.__subclasses__.__doc__ __subclasses__() -> list of immediate subclasses\end{verbatim} \begin{verbatim}>>> class A(object): pass >>> class B(A): pass >>> B.mro() [, , ] >>> A.__subclasses__() []\end{verbatim} \end{quote} Notice that \texttt{mro()} and \texttt{{\_}{\_}subclasses{\_}{\_}} are not retrieved by \texttt{dir}. Let me constrast metamethods with the more traditional classmethods. In many senses, the to concepts are akin: \begin{quote} \begin{verbatim}>>> class M(type): ... "Metaclass with a (meta)method mm" ... def mm(cls): return cls >>> D=M('D',(),{'cm':classmethod(lambda cls: cls)}) >>> # instance of M with a classmethod cm >>> D.mm # the metamethod > >>> D.cm # the classmethod >\end{verbatim} \end{quote} Notice the similarities between the classmethod and the metamethod: \begin{quote} \begin{verbatim}>>> D.mm.im_self, D.cm.im_self # the same (, ) >>> D.mm.im_class, D.cm.im_class # still the same (, )\end{verbatim} \end{quote} There are no surprises for \texttt{im{\_}func}: \begin{quote} \begin{verbatim}>>> D.mm.im_func, D.cm.im_func (, at 0x402c280c>)\end{verbatim} \end{quote} Nevertheless, there are differences: metamethods are not bounded to instances of the class \begin{quote} \begin{verbatim}>>> D().cm() # the classmethod works fine >>> D().mm() # the metamethod does not: error Traceback (most recent call last): File "", line 1, in ? AttributeError: 'D' object has no attribute 'mm'\end{verbatim} \end{quote} and they are not retrieved by \texttt{dir}: \begin{quote} \begin{verbatim}>>> from oopp import * >>> attributes(D).keys() # mm is not retrieved, only cm ['cm']\end{verbatim} \begin{verbatim}>>> cm.__get__('whatever') #under Python 2.2.0 would give a serious error Segmentation fault >>> cm.__get__(None) #under Python 2.3 there is no error of >\end{verbatim} \end{quote} Moreover metamethods behaves differently with respect to multiple inheritance. If a class A define a classmethod cA and a class B defines a classmethod cB, then the class C(A,B) inherits both the classmethods cA and cB. In the case of metamethods defined in M{\_}A and M{\_}B, the same is true only if one resolves the meta-type conflict by hand, by generating the metaclass M{\_}C(M{\_}A,M{\_}B). In this sense, classmethods are simpler to use than metamethods. %___________________________________________________________________________ \hypertarget{the-magic-of-metaclasses-part-2}{} \pdfbookmark[0]{THE MAGIC OF METACLASSES - PART 2}{the-magic-of-metaclasses-part-2} \section*{THE MAGIC OF METACLASSES - PART 2} Metaclasses are so powerful that a single chapter is not enough to make justice to them ;) In this second chapter on metaclasses I will unravel their deepest secrets, covering topics such as meta-metaclasses, anonymous inner metaclasses, global metaclasses and advanced class factories. Moreover, I will give various magical applications of metaclasses, in the realm of enhancing the Python language itself. Actually, this is probably the most idiomatic application of metaclasses (Guido's examples on the metaclass usage are all in this area). I will show how metaclasses can be used to enhance the \texttt{super} cooperatice call mechanism. This is not a chapter for the faint of heart. %___________________________________________________________________________ \hypertarget{the-secrets-of-the-metaclass-hook}{} \pdfbookmark[1]{The secrets of the \_\_metaclass\_\_ hook}{the-secrets-of-the-metaclass-hook} \subsection*{The secrets of the \texttt{{\_}{\_}metaclass{\_}{\_}} hook} In the previous chapter we have seen how the \texttt{{\_}{\_}metaclass{\_}{\_}} hook can be used as a way of metaclass enhancing pre-existing classes with a minimal change of the sourcecode. But it has much deeper secrets. The first and simplest of them, is the fact that the hook can be used it can also be defined at the module level, \emph{outside} the class. This allows a number of neat tricks, since in presence of a \texttt{{\_}{\_}metaclass{\_}{\_}} hook at the module level \emph{all} the old style classes in the module (including nested ones!) acquire that hook. A first application is to rejuvenate old style classes to new style classes. I remind that old style classes are retained with compability with old code, but they are a pain in the back, if you want to use features intended for new style classes only (for instance properties etc.). Naively, one would expect the conversion from old style classes to new style to be long and error prone: suppose you have a very large application with hundreds of old style classes defined in dozens of modules. Suppose you want to update your application to Python 2.2+ classes in order to take advantage of the new features I have discussed extensively in this book: the naive way to go would be to go trough the source, look for all classes definitions and change \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{Classname:~-->~Classname(object)} \end{flushleft}\end{ttfamily} \end{quote} One could solve this problem with a regular expression search and replace in all modules, but this would require to change \emph{all} the source. This is againt the spirit of OOP, we must \emph{reuse} old code. Metaclasses are particularly handy to solve this problem: actually it is enough to add to your modules the following line as first line: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\_}{\_}metaclass{\_}{\_}~=~type} \end{flushleft}\end{ttfamily} \end{quote} Then, all your old style classes will have 'type' as their metaclass: this is akin to say that all the old style classes are \emph{automagically} rejuvenate to new style classes! And this also works for \emph{nested} classes!! \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{{\_}{\_}metaclass{\_}{\_}~=~type~{\#}~this~rejuvanate~all~the~class~in~the~module}\\ \mbox{}\\ \mbox{class~C:}\\ \mbox{~~~class~D:~pass}\\ \mbox{}\\ \mbox{print~dir(C)~~~{\#}~both~C~and~C.D}\\ \mbox{print~dir(C.D)~{\#}~are~now~new~style~classes}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} This very first example add consistence (if needed) to the widespread belief that metaclasses have a well deserved reputation of magic. The explanation is that defining a global metaclass called \texttt{{\_}{\_}metaclass{\_}{\_}} automatically makes all old style classes (new style class simply ignore the existence of the global \texttt{{\_}{\_}metaclass{\_}{\_}}) defined in you module instances of the given metaclass; this automatically converts them to new style classes. %___________________________________________________________________________ \hypertarget{anonymous-inner-metaclasses}{} \pdfbookmark[1]{Anonymous inner metaclasses}{anonymous-inner-metaclasses} \subsection*{Anonymous inner metaclasses} A second, deeper secret of the \texttt{{\_}{\_}metaclass{\_}{\_}} hook is that it can be used to define anonymous \emph{inner metaclasses}. The following example explain what I mean: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~totuple(arg):}\\ \mbox{~~~~"Converts~the~argument~to~a~tuple,~if~need~there~is"}\\ \mbox{~~~~if~isinstance(arg,tuple):~return~arg~{\#}~do~nothing}\\ \mbox{~~~~else:~return~(arg,)~{\#}~convert~to~tuple}\\ \mbox{}\\ \mbox{class~BracketCallable(object):}\\ \mbox{~~~~"""Any~subclass~C(BracketCallable)~can~be~called~with~the~syntax~C[t],~}\\ \mbox{~~~~where~t~is~a~tuple~of~arguments~stored~in~bracket{\_}args;~~returns~the~}\\ \mbox{~~~~class~or~an~instance~of~it,~depending~on~the~flag~'returnclass'."""}\\ \mbox{}\\ \mbox{~~~~returnclass=True}\\ \mbox{~~~~class~{\_}{\_}metaclass{\_}{\_}(type):~{\#}~anonymous~inner~metaclass}\\ \mbox{~~~~~~~~def~{\_}{\_}getitem{\_}{\_}(cls,args):~{\#}~non~cooperative~metamethod}\\ \mbox{~~~~~~~~~~~~if~cls.returnclass:~}\\ \mbox{~~~~~~~~~~~~~~~~c=type(cls.{\_}{\_}name{\_}{\_},(cls,),{\{}'bracket{\_}args':totuple(args){\}})}\\ \mbox{~~~~~~~~~~~~~~~~return~c~{\#}~a~customized~copy~of~the~original~class}\\ \mbox{~~~~~~~~~~~~else:}\\ \mbox{~~~~~~~~~~~~~~~~self=cls();~self.bracket{\_}args=totuple(args)}\\ \mbox{~~~~~~~~~~~~~~~~return~self}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} In this code 'BracketCallable.{\_}{\_}metaclass{\_}{\_}' is the anonymous (actually it has a special name, \texttt{{\_}{\_}metaclass{\_}{\_}}) inner metaclass of 'BracketCallable'. The effect of 'BracketCallable.{\_}{\_}metaclass{\_}{\_}' is the following: it makes 'BracketCallable' and its descendants callable with brackets. Since the 'returnclass' flag is set, \texttt{{\_}{\_}getitem{\_}{\_}} returns the class with an attribute 'bracket{\_}args' containing the tuple of the passed arguments (otherwise it returns an instance of the class). This works since when Python encounters an expression of kind \texttt{cls[arg]} it interprets it as \texttt{type(cls).{\_}{\_}getitem{\_}{\_}(cls,arg)}. Therefore, if \texttt{cls} is a subclass of 'BracketCallable', this means that \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{cls[arg]~<=>~BracketCallable.{\_}{\_}metaclass{\_}{\_}.{\_}{\_}getitem{\_}{\_}(cls,arg)} \end{flushleft}\end{ttfamily} \end{quote} Let me give few examples: \begin{quote} \begin{verbatim}>>> from oopp import BracketCallable >>> type(BracketCallable) >>> print type(BracketCallable).__name__ # not really anonymous __metaclass__ >>> print BracketCallable['a1'].bracket_args ('a1',) >>> print BracketCallable['a1','a2'].bracket_args ('a1', 'a2')\end{verbatim} \end{quote} This syntactical feature is an example of a thing that can be done \emph{trough metaclasses only}: it cannot be emulated by functions. Anonymous inner metaclasses are the least verbose manner of defining metamethods. Moreover, they are a neat trick to define mix-in classes that, when inherited, can metamagically enhance an entire multiple inheritance hierarchy. In the previous example \texttt{{\_}{\_}getitem{\_}{\_}} is noncooperative, but nothing forbids anonymous inner metaclasses from being made cooperative. However, there is some subtlety one must be aware of. Let me give an example. My 'WithCounter' class counts how many instances of 'WithCounter' and its subclasses are generated. However, it does not distinguishes bewteen different subclasses. This was correct in the pizza shop example, simple only the total number of produced pizzas mattered, however, in other situations, one may want to reset the counter each time a new subclass is created. This can be done automagically by a cooperative inner metaclass: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{class~WithMultiCounter(WithCounter):}\\ \mbox{~~~~"""Each~time~a~new~subclass~is~derived,~the~counter~is~reset"""}\\ \mbox{~~~~class~{\_}{\_}metaclass{\_}{\_}(type):}\\ \mbox{~~~~~~~~def~{\_}{\_}init{\_}{\_}(cls,*args):}\\ \mbox{~~~~~~~~~~~~cls.counter=0}\\ \mbox{~~~~~~~~~~~~super(cls.{\_}{\_}this,cls).{\_}{\_}init{\_}{\_}(*args)}\\ \mbox{~~~~reflective({\_}{\_}metaclass{\_}{\_})} \end{flushleft}\end{ttfamily} \end{quote} Notice that the order of execution of this code is subtle: \newcounter{listcnt30} \begin{list}{\arabic{listcnt30})} { \usecounter{listcnt30} \setlength{\rightmargin}{\leftmargin} } \item {} first, the fact that WithMulticounter has a non-trivial metaclass is registered, but nothing else is done; \item {} then, the line \texttt{reflective({\_}{\_}metaclass{\_}{\_})} is executed: this means that the inner metaclass (and therefore its instances) get an attribute \texttt{.{\_}metaclass{\_}{\_}this} containing a reference to the inner metaclass; \item {} then, the outer class is passed to its inner metaclass and created by the inherited metaclass' \texttt{{\_}{\_}new{\_}{\_}} method; \item {} at this point \texttt{cls} exists and \texttt{cls.{\_}{\_}this} is inherited from \texttt{{\_}{\_}metaclass{\_}{\_}.{\_}metaclass{\_}{\_}this}; this means that the expression \texttt{super(cls.{\_}{\_}this,cls).{\_}{\_}init{\_}{\_}(*args)} is correctly recognized and 'WithMultiCounter' can be initialized; \item {} only after that, the name 'WithMultiCounter' enters in the global namespace and can be recognized. \end{list} Notice in particular that inside \texttt{super}, we could also use \texttt{cls.{\_}{\_}metaclass{\_}{\_}} instead of \texttt{cls.{\_}{\_}this}, but this would not work inside \texttt{{\_}{\_}new{\_}{\_}}, whereas \texttt{{\_}{\_}this} would be recognized even in \texttt{{\_}{\_}new{\_}{\_}}. \begin{quote} \begin{verbatim}>>> from oopp import * >>> print MRO(WithMultiCounter) 1 - WithMultiCounter(WithCounter)[__metaclass__] 2 - WithCounter(object) 3 - object()\end{verbatim} \end{quote} For sake of readability, often it is convenient to give a name even to inner classes: \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~WithMultiCounter(WithCounter):}\\ \mbox{~~~~"""Each~time~a~new~subclass~is~derived,~the~counter~is~reset"""}\\ \mbox{~~~~class~ResetsCounter(type):}\\ \mbox{~~~~~~~~def~{\_}{\_}init{\_}{\_}(cls,*args):}\\ \mbox{~~~~~~~~~~~~cls.counter=0}\\ \mbox{~~~~~~~~~~~~super(cls.ResetsCounter,cls).{\_}{\_}init{\_}{\_}(*args)}\\ \mbox{~~~~{\_}{\_}metaclass{\_}{\_}=ResetsCounter}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} Notice that inside super we used the expression \texttt{cls.ResetsCounter} and not \texttt{WithMultiCounter.ResetsCounter}: doing that would generate a \texttt{NameError: global name 'WithMultiCounter' is not defined} since at the time when \texttt{ResetsCounter.{\_}{\_}init{\_}{\_}} is called for the first time, the class \texttt{WithMultiCounter} exists but is has not yet entered the global namespace: this will happens only after the initialization in the \texttt{ResetsCounter} metaclass, as we said before. Without the metaclass one can reset the counter by hand each time, or can reset the counter on all the classes of the hierarchy with a convenient function (akin to the 'traceH' routine defined in chapter 6). Example: \begin{quote} \begin{verbatim}>>> from oopp import * >>> class GrandFather(WithMultiCounter): pass >>> class Father(GrandFather): pass >>> class Child(Father): pass >>> GrandFather() <__main__.GrandFather object at 0x402f7f6c> # first GrandFather instance >>> Father() <__main__.Father object at 0x402f79ec> # first Father instance >>> Father() <__main__.Father object at 0x402f7d4c> # second Father instance >>> Child.counter # zero instances 0 >>> Father.counter # two instances 2 >>> GrandFather.counter # one instance 1\end{verbatim} \end{quote} I leave as an exercise for the reader to show that the original 'WithCounter' would fail to count correctly the different subclasses and would put the total number of instances in 'Child'. %___________________________________________________________________________ \hypertarget{passing-parameters-to-meta-classes}{} \pdfbookmark[1]{Passing parameters to (meta) classes}{passing-parameters-to-meta-classes} \subsection*{Passing parameters to (meta) classes} Calling a class with brackets is a way of passing parameters to it (or to its instances, if the 'returnclass' flag is not set). There additional ways for of doing that. One can control the instantiation syntax of classes by redefining the \texttt{{\_}{\_}call{\_}{\_}} method of the metaclass. The point is that when we instantiate an object with the syntax \texttt{c=C()}, Python looks at the \texttt{{\_}{\_}call{\_}{\_}} method of the metaclass of 'C'; the default behaviour it is to call \texttt{C.{\_}{\_}new{\_}{\_}} and \texttt{C.{\_}{\_}init{\_}{\_}} in succession, however, that behavior can be overridden. Let me give an example without using anonymous metaclasses (for sake of clarity only). \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~M(type):~{\#}~this~is~C~metaclass}\\ \mbox{~~~~def~{\_}{\_}call{\_}{\_}(cls):}\\ \mbox{~~~~~~~~return~"Called~M.{\_}{\_}call{\_}{\_}"~}\\ \mbox{}\\ \mbox{C=M('C',(),{\{}{\}})~{\#}~calls~type(M).{\_}{\_}call{\_}{\_}}\\ \mbox{c=C()~{\#}~calls~type(C).{\_}{\_}call{\_}{\_}}\\ \mbox{{\#}~attention:~c~is~a~string!}\\ \mbox{print~c~{\#}=>~Called~M.{\_}{\_}call{\_}{\_}}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} In this example, \texttt{M.{\_}{\_}call{\_}{\_}} simply returns the string \texttt{Called M.{\_}{\_}call{\_}{\_}}, and the class 'C' is \emph{not} instantiated. Overriding the metaclass \texttt{{\_}{\_}call{\_}{\_} `` method therefore provides another way to implement the ``Singleton} pattern. However, savage overridings as the one in this example, are not a good idea, since it will confuse everybody. This is an example where metaclasses change the semantics: whereas usually the notation \texttt{C()} means ''creates a C instance``, the metaclass can give to the syntax \texttt{C()} any meaning we want. Here there is both the power and the danger of metaclasses: they allows to make both miracles and disasters. Nevertheless, used with a grain of salt, they provide a pretty nice convenience. Anyway, overriding the '{\_}{\_}call{\_}{\_}' method of the metaclass can be confusing, since parenthesis are usually reserved to mean instantion, therefore I will prefere to pass arguments trough brackets. The beauty and the magic of metaclasses stays in the fact that this mechanism is completely general: since metaclasses themselves are classes, we can 'CallableWithBrackets' to pass arguments to a metaclass, i.e. 'CallableWithBrackets' can also be used as a meta-metaclass! I leave as an exercise for the reader to figure out how to define meta-meta-metaclasses, meta-meta-meta-metaclasses, etc. etc. (there is no limit to the abstraction level you can reach with metaclasses;-) Let me show an example: a magical way of making methods cooperative. This can be done trough a 'Cooperative' metaclass that inherits from 'BracketCallable' and therefore has 'BracketCallable.{\_}{\_}metaclass{\_}{\_}' as (meta)metaclass: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~Cooperative(BracketCallable,type):}\\ \mbox{~~~~"""Bracket-callable~metaclass~implementing~cooperative~methods.~Works}\\ \mbox{~~~~well~for~plain~methods~returning~None,~such~as~{\_}{\_}init{\_}{\_}"""}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(cls,*args):}\\ \mbox{~~~~~~~~methods=cls.bracket{\_}args}\\ \mbox{~~~~~~~~for~meth~in~methods:~}\\ \mbox{~~~~~~~~~~~~setattr(cls,meth,cls.coop{\_}method(meth,vars(cls).get(meth)))}\\ \mbox{~~~~def~coop{\_}method(cls,name,method):~{\#}~method~can~be~None}\\ \mbox{~~~~~~~~"""Calls~both~the~superclass~method~and~the~class~method~(if~the~}\\ \mbox{~~~~~~~~class~has~an~explicit~method).~Implemented~via~a~closure"""}\\ \mbox{~~~~~~~~def~{\_}(self,*args,**kw):}\\ \mbox{~~~~~~~~~~~~getattr(super(cls,self),name)(*args,**kw)~{\#}~call~the~supermethod}\\ \mbox{~~~~~~~~~~~~if~method:~method(self,*args,**kw)~{\#}~call~the~method}\\ \mbox{~~~~~~~~return~{\_}}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} The code above works for methods returing \texttt{None}, such as \texttt{{\_}{\_}init{\_}{\_}}. Here I give a first example of application: a hierarchy where the \texttt{{\_}{\_}init{\_}{\_}} methods are automatically called (similar to automatic initialization in Java). \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{from~oopp~import~Cooperative}\\ \mbox{}\\ \mbox{class~B(object):}\\ \mbox{~~~~"""Cooperative~base~class;~all~its~descendants~will~automagically~}\\ \mbox{~~~~invoke~their~ancestors~{\_}{\_}init{\_}{\_}~methods~in~chain."""}\\ \mbox{~~~~{\_}{\_}metaclass{\_}{\_}=Cooperative['{\_}{\_}init{\_}{\_}']}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(self,*args,**kw):}\\ \mbox{~~~~~~~~print~"This~is~B.{\_}{\_}init{\_}{\_}"}\\ \mbox{}\\ \mbox{class~C(B):}\\ \mbox{~~~~"Has~not~explicit~{\_}{\_}init{\_}{\_}"}\\ \mbox{}\\ \mbox{class~D(C):}\\ \mbox{~~~~"""The~metaclass~makes~D.{\_}{\_}init{\_}{\_}~to~call~C.{\_}{\_}init{\_}{\_}~and~}\\ \mbox{~~~~therefore~B.{\_}{\_}init{\_}{\_}"""}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(self,*args,**kw):}\\ \mbox{~~~~~~~~print~"This~is~D.{\_}{\_}init{\_}{\_}"}\\ \mbox{}\\ \mbox{d=D()}\\ \mbox{}\\ \mbox{print~"The~metaclass~of~B~is",type(B)}\\ \mbox{print~"The~meta-metaclass~of~B~is",~type(type(B))}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Output: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{This~is~B.{\_}{\_}init{\_}{\_}}\\ \mbox{This~is~D.{\_}{\_}init{\_}{\_}}\\ \mbox{The~metaclass~of~B~is~}\\ \mbox{The~meta-metaclass~of~B~~is~} \end{flushleft}\end{ttfamily} \end{quote} A second example, is the following, an alternative way of making the paleoanthropological hierarchy of chapter 4 cooperative: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{from~oopp~import~Cooperative,Homo}\\ \mbox{}\\ \mbox{class~HomoHabilis(Homo):}\\ \mbox{~~~~{\_}{\_}metaclass{\_}{\_}=Cooperative['can']}\\ \mbox{~~~~def~can(self):}\\ \mbox{~~~~~~~~print~"~-~make~tools"}\\ \mbox{}\\ \mbox{class~HomoSapiens(HomoHabilis):}\\ \mbox{~~~~def~can(self):}\\ \mbox{~~~~~~~~print~"~-~make~abstractions"}\\ \mbox{~~~~}\\ \mbox{class~HomoSapiensSapiens(HomoSapiens):}\\ \mbox{~~~~def~can(self):}\\ \mbox{~~~~~~~~print~"~-~make~art"}\\ \mbox{}\\ \mbox{HomoSapiensSapiens().can()}\\ \mbox{}\\ \mbox{{\#}~Output:}\\ \mbox{}\\ \mbox{{\#}~~can:}\\ \mbox{{\#}~~-~make~tools}\\ \mbox{{\#}~~-~make~abstractions}\\ \mbox{{\#}~~-~make~art}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Metaclasses can be used to violate the old good rule ''explicit is better than implicit``. Looking at the source code for 'HomoSapiens' and 'HomoSapiensSapiens' one would never imagine the \texttt{can} is somewhat special. That is why in the following I will prefer to use the anonymous super call mechanism, which is explicit, instead of the implicit cooperative mechanism. %___________________________________________________________________________ \hypertarget{meta-functions}{} \pdfbookmark[1]{Meta-functions}{meta-functions} \subsection*{Meta-functions} The third and deepest secret of the \texttt{{\_}{\_}metaclass{\_}{\_}} hook is that, even if it is typically used in conjunction with metaclasses, actually the hook can refer to generic class factories callable with the signature \texttt{(name,bases,dic)}. Let me show a few examples where \texttt{{\_}{\_}metaclass{\_}{\_}} is a function or a generic callable object instead of being a metaclass: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{from~oopp~import~kwdict}\\ \mbox{}\\ \mbox{class~Callable(object):}\\ \mbox{~~~~def~{\_}{\_}call{\_}{\_}(self,name,bases,dic):}\\ \mbox{~~~~~~~~print~name,bases,'{\textbackslash}n',kwdict(dic)}\\ \mbox{~~~~~~~~return~type(name,bases,dic)}\\ \mbox{}\\ \mbox{callableobj=Callable()}\\ \mbox{}\\ \mbox{class~C:~{\_}{\_}metaclass{\_}{\_}=callableobj}\\ \mbox{}\\ \mbox{print~"type~of~C:",C.{\_}{\_}class{\_}{\_}}\\ \mbox{}\\ \mbox{def~f(name,bases,dic):}\\ \mbox{~~~~print~name,bases,'{\textbackslash}n',kwdict(dic)}\\ \mbox{~~~~return~type(name,bases,dic)}\\ \mbox{}\\ \mbox{class~D:~{\_}{\_}metaclass{\_}{\_}=f}\\ \mbox{}\\ \mbox{print~"type~of~D:",D.{\_}{\_}class{\_}{\_}}\\ \mbox{}\\ \mbox{class~B(object):}\\ \mbox{~~~~def~{\_}{\_}metaclass{\_}{\_}(name,bases,dic):}\\ \mbox{~~~~~~~~"""In~this~form,~the~{\_}{\_}metaclass{\_}{\_}~attribute~is~a~function.~}\\ \mbox{~~~~~~~~In~practice,~it~works~as~a~special~static~method~analogous~}\\ \mbox{~~~~~~~~to~{\_}{\_}new{\_}{\_}"""}\\ \mbox{~~~~~~~~print~"name:~",~name}\\ \mbox{~~~~~~~~print~"bases:",~bases}\\ \mbox{~~~~~~~~print~"dic:{\textbackslash}n",kwdict(dic)}\\ \mbox{~~~~~~~~return~type(name,bases,dic)}\\ \mbox{}\\ \mbox{class~E(B):~pass}\\ \mbox{}\\ \mbox{print~"type~of~E:",E.{\_}{\_}class{\_}{\_}}\\ \mbox{print~"Non-called~E.{\_}{\_}metaclass{\_}{\_}:",~E.{\_}{\_}metaclass{\_}{\_}}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} With output \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{C~()~}\\ \mbox{{\_}{\_}metaclass{\_}{\_}~=~}\\ \mbox{{\_}{\_}module{\_}{\_}~=~{\_}{\_}builtin{\_}{\_}}\\ \mbox{type~of~C:~}\\ \mbox{D~()~}\\ \mbox{{\_}{\_}metaclass{\_}{\_}~=~}\\ \mbox{{\_}{\_}module{\_}{\_}~=~{\_}{\_}builtin{\_}{\_}}\\ \mbox{type~of~D:~}\\ \mbox{name:~~B}\\ \mbox{bases:~(,)}\\ \mbox{dic:~}\\ \mbox{{\_}{\_}metaclass{\_}{\_}~=~}\\ \mbox{{\_}{\_}module{\_}{\_}~=~{\_}{\_}builtin{\_}{\_}}\\ \mbox{type~of~E:~}\\ \mbox{Non-called~E.{\_}{\_}metaclass{\_}{\_}:~} \end{flushleft}\end{ttfamily} \end{quote} The advantage/disadvantage of this solution is that the \texttt{{\_}{\_}metaclass{\_}{\_}} hook is called only once, i.e. it is not called again if a new class is derived from the original one. For instance in this example 'E' is derived from 'B', but the function \texttt{B.{\_}{\_}metaclass{\_}{\_}} is \emph{not} called during the creation of 'E'. Metafunctions can also be used when one does not want to transmit the metaclass contraint. Therefore they usage is convenient in exactly the opposite situation of a cooperative metaclass. %___________________________________________________________________________ \hypertarget{anonymous-cooperative-super-calls}{} \pdfbookmark[1]{Anonymous cooperative super calls}{anonymous-cooperative-super-calls} \subsection*{Anonymous cooperative super calls} As I noticed in the previous chapters, the \texttt{super} mechanism has an annoying problem: one needs to pass explicitely the name of the base class. Typically, this is simply an inelegance since it is annoying to be forced to retype the name of the base class. However, in particular cases, it can be a problem. This happens for instance if we try to pass the class's methods to a different class: one cannot do that, since the methods contains an explicit reference to the original class and would not work with the new one. Moreover, having named super calls is annoying in view of refactoring. Consider for instance the previous \texttt{supernew.py} script: in the \texttt{{\_}{\_}new{\_}{\_}} method defined inside the class 'B', we called \texttt{Super} with the syntax \texttt{Super(B,cls)} by repeating the name of the class 'B'. Now, if in the following I decide to give to 'B' a more descriptive name, I have to go trough the source, search all the \texttt{super} calls, and change them accordingly to the new name. It would be nice having Python do the job for me. A first solution is to call \texttt{super} (or \texttt{Super}) with the syntax \texttt{super(self.{\_}{\_}this,obj)}, where the special name \texttt{{\_}{\_}this} is explicitly replaced by the name of the class where the call is defined by the 'reflective' function of last chapter. This approach has the disadvantage that each time we derive a new class, we need to invoke \emph{explicitely} the routine \texttt{reflective}. It would be marvelous to instruct Python to invoke \texttt{reflective} automatically at each class creation. Actually, this seems to be deep magic and indeed it is: fortunately, a custom metaclass can perform this deep magic in few lines: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{~~}\\ \mbox{class~Reflective(type):}\\ \mbox{~~~~"""Cooperative~metaclass~that~defines~the~private~variable~{\_}{\_}this~in}\\ \mbox{~~~~its~instances.~{\_}{\_}this~contains~a~reference~to~the~class,~therefore}\\ \mbox{~~~~it~allows~anonymous~cooperative~super~calls~in~the~class."""}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(cls,*args):}\\ \mbox{~~~~~~~~super(Reflective,cls).{\_}{\_}init{\_}{\_}(*args)}\\ \mbox{~~~~~~~~reflective(cls)}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Now, let me show how 'Reflective' can be used in a practical example. By deriving new metaclasses from 'Reflective', one can easily create powerful class factories that generate reflective classes. Suppose I want to define a handy class factory with the abilitity of counting the number of its instances. This can be done by noticing that metaclasses are just classes, therefore they can be composed with regular classes in multiple inheritance. In particular one can derive a 'Logged' metaclass from 'WithLogger': in this way we send a message to a log file each time a new class is created. This can be done by composing 'WithLogger' with 'WithMultiCounter.{\_}{\_}metaclass{\_}{\_}' and with 'Reflective': \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~Logged(WithLogger,Reflective):~}\\ \mbox{~~~~"""Metaclass~that~reuses~the~features~provided~by~WithLogger.~In~particular}\\ \mbox{~~~~the~classes~created~by~Logged~are~Reflective,~PrettyPrinted~}\\ \mbox{~~~~and~Customizable."""~{\#}WithLogger~provides~logfile~and~verboselog}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(cls,*args,**kw):}\\ \mbox{~~~~~~~~super(Logged,cls).{\_}{\_}init{\_}{\_}(*args,**kw)~}\\ \mbox{~~~~~~~~bases=','.join([c.{\_}{\_}name{\_}{\_}~for~c~in~cls.{\_}{\_}bases{\_}{\_}])}\\ \mbox{~~~~~~~~print~>>~cls.logfile,~"{\%}s~is~a~child~of~{\%}s"~{\%}~(cls,bases)}\\ \mbox{~~~~~~~~print~>>~cls.logfile,'and~an~instance~of~{\%}s'~{\%}~type(cls).{\_}{\_}name{\_}{\_}}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} The MRO is \begin{quote} \begin{verbatim}>>> print MRO(Logged) MRO of Logged: 0 - Logged(WithLogger,Reflective) 1 - WithLogger(WithCounter,PrettyPrinted) 2 - WithCounter(object) 3 - PrettyPrinted(object) 4 - Reflective(type) 5 - type(object) 6 - object()\end{verbatim} \end{quote} and the inheritance graph can be drawn as follows: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{~~~~~~~{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}{\_}~object~6~{\_}{\_}{\_}}\\ \mbox{~~~~~~/~~~~~~~~~~~~~~~~~~~~~~~~/~~~~~~~~~{\textbackslash}}\\ \mbox{2~WithCounter~~~~~~~~3~PrettyPrinted~~~~~~~~type~5}\\ \mbox{~~~~~~~~~{\textbackslash}~~~~~~~~~~~~~~~~/~~~~~~~~~~~~~~~/}\\ \mbox{~~~~~~~~~~{\textbackslash}~~~~~~~~~~~~~~/~~~~~~~~~~~~~~~/}\\ \mbox{~~~~~~~~~~~{\textbackslash}~~~~~~~~~~~~/~~~~~~~~~~~~~~~/}\\ \mbox{~~~~~~~~~~~~{\textbackslash}~~~~~~~~~~/~~~~~~~~~~~~~~~/}\\ \mbox{~~~~~~~~~~~~~{\textbackslash}~~~~~~~~/~~~~~~~~~~~~~~~/}\\ \mbox{~~~~~~~~~~~~~~{\textbackslash}~~~~~~/~~~~~~~~~~~~~~~/}\\ \mbox{~~~~~~~~~~~~~1~WithLogger~~~~~~~Reflective~4}\\ \mbox{~~~~~~~~~~~~~~~~~~{\textbackslash}~~~~~~~~~~~/}\\ \mbox{~~~~~~~~~~~~~~~~~~~{\textbackslash}~~~~~~~~~/}\\ \mbox{~~~~~~~~~~~~~~~~~~~~{\textbackslash}~~~~~~~/}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~{\textbackslash}~~~~~/}\\ \mbox{~~~~~~~~~~~~~~~~Logged~0}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~:}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~:}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~C1} \end{flushleft}\end{ttfamily} \end{quote} 'WithCounter' acts now as a metaclass, since WithCounter.{\_}{\_}new{\_}{\_} invokes type.{\_}{\_}new{\_}{\_}. Since \texttt{type.{\_}{\_}new{\_}{\_}} is non-cooperative, in the composition of a metaclass with a regular class, the metaclass should be put first: this guarantees that \texttt{{\_}{\_}new{\_}{\_}} derives from \texttt{type.{\_}{\_}new{\_}{\_}}, thus avoiding the error message. \begin{quote} \begin{verbatim}>>> Logged.verboselog=True >>> C1=Logged('C1',(),{}) ***************************************************************************** Tue Apr 22 18:47:05 2003 1. Created 'C1' with accessibile non-special attributes: _C1__this = 'C1' 'C1' is a child of object and an instance of Logged\end{verbatim} \end{quote} Notice that any instance of 'WithCounterReflective' inherits the 'WithCounter' attribute \texttt{counter}, that counts the number of classes that have been instantiated (however it is not retrieved by \texttt{dir}; moreover the instances of 'WithCounterReflective' instances have no \texttt{counter} attribute). \begin{quote} \begin{verbatim}>>> C1.counter 1\end{verbatim} \end{quote} %___________________________________________________________________________ \hypertarget{more-on-metaclasses-as-class-factories}{} \pdfbookmark[1]{More on metaclasses as class factories}{more-on-metaclasses-as-class-factories} \subsection*{More on metaclasses as class factories} A slight disadvantage of the approach just described, is that 'Logged' cooperatively invokes the \texttt{type.{\_}{\_}new{\_}{\_}} static method, therefore, when we invoke the metaclass, we must explicitly provide a name, a tuple of base classes and a dictionary, since the \texttt{type.{\_}{\_}new{\_}{\_}} staticmethod requires that signature. Actually, the expression \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{C=Logged(name,bases,dic)} \end{flushleft}\end{ttfamily} \end{quote} is roughly syntactic sugar for \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{C=Logged.{\_}{\_}new{\_}{\_}(Logged,name,bases,dic)~}\\ \mbox{assert~isinstance(C,Logged)}\\ \mbox{Logged.{\_}{\_}init{\_}{\_}(C,name,bases,dic)} \end{flushleft}\end{ttfamily} \end{quote} If a different interface is desired, the best way is to use a class factory 'ClsFactory' analogous to the object factory 'Makeobj' defined in chapter 4. It is convenient to make 'ClsFactory' bracket-callable. \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~ClsFactory(BracketCallable):}\\ \mbox{~~~~"""Bracket~callable~non-cooperative~class~acting~as~}\\ \mbox{~~~~a~factory~of~class~factories.}\\ \mbox{}\\ \mbox{~~~~ClsFactory~instances~are~class~factories~accepting~0,1,2~or~3~arguments.~}\\ \mbox{~~.~They~automatically~converts~functions~to~static~methods~}\\ \mbox{~~~~if~the~input~object~is~not~a~class.~If~an~explicit~name~is~not~passed}\\ \mbox{~~~~the~name~of~the~created~class~is~obtained~by~adding~an~underscore~to~}\\ \mbox{~~~~the~name~of~the~original~object."""}\\ \mbox{~~~~}\\ \mbox{~~~~returnclass=False~{\#}~ClsFactory[X]~returns~an~*instance*~of~ClsFactory}\\ \mbox{}\\ \mbox{~~~~def~{\_}{\_}call{\_}{\_}(self,~*args):}\\ \mbox{~~~~~~~~"""Generates~a~new~class~using~self.meta~and~avoiding~conflicts.}\\ \mbox{~~~~~~~~The~first~metaobject~can~be~a~dictionary,~an~object~with~a}\\ \mbox{~~~~~~~~dictionary~(except~a~class),~or~a~simple~name."""}\\ \mbox{~~~~~~~~}\\ \mbox{~~~~~~~~{\#}~default~attributes}\\ \mbox{~~~~~~~~self.name="CreatedWithClsFactory"~~~~}\\ \mbox{~~~~~~~~self.bases=()}\\ \mbox{~~~~~~~~self.dic={\{}{\}}}\\ \mbox{~~~~~~~~self.metas=self.bracket{\_}args}\\ \mbox{}\\ \mbox{~~~~~~~~if~len(args)==1:}\\ \mbox{~~~~~~~~~~~~arg=args[0]}\\ \mbox{~~~~~~~~~~~~if~isinstance(arg,str):~{\#}~is~a~name~}\\ \mbox{~~~~~~~~~~~~~~~~self.name=arg}\\ \mbox{~~~~~~~~~~~~elif~hasattr(arg,'{\_}{\_}name{\_}{\_}'):~{\#}~has~a~name}\\ \mbox{~~~~~~~~~~~~~~~~self.name=arg.{\_}{\_}name{\_}{\_}+'{\_}'}\\ \mbox{~~~~~~~~~~~~self.setbasesdic(arg)}\\ \mbox{~~~~~~~~elif~len(args)==2:~}\\ \mbox{~~~~~~~~~~~~self.name=args[0]~}\\ \mbox{~~~~~~~~~~~~assert~isinstance(self.name,str)~{\#}~must~be~a~name}\\ \mbox{~~~~~~~~~~~~self.setbasesdic(args[1])}\\ \mbox{~~~~~~~~elif~len(args)==3:~{\#}~must~be~name,bases,dic}\\ \mbox{~~~~~~~~~~~~self.name=args[0]}\\ \mbox{~~~~~~~~~~~~self.bases+=args[1]}\\ \mbox{~~~~~~~~~~~~self.dic.update(args[2])}\\ \mbox{~~~~~~~~if~len(args)<3~and~not~self.bases:~{\#}~creating~class~from~a~non-class}\\ \mbox{~~~~~~~~~~~~for~k,v~in~self.dic.iteritems():}\\ \mbox{~~~~~~~~~~~~~~~~if~isfunction(v):~self.dic[k]=staticmethod(v)}\\ \mbox{~~~~~~~~{\#}return~child(*self.bases,**vars(self))}\\ \mbox{~~~~~~~~return~makecls(*self.metas)(self.name,self.bases,self.dic)}\\ \mbox{}\\ \mbox{~~~~def~setbasesdic(self,obj):}\\ \mbox{~~~~~~~~if~isinstance(obj,tuple):~{\#}~is~a~tuple}\\ \mbox{~~~~~~~~~~~~self.bases+=obj}\\ \mbox{~~~~~~~~elif~hasattr(obj,'{\_}{\_}bases{\_}{\_}'):~{\#}~is~a~class}\\ \mbox{~~~~~~~~~~~~self.bases+=obj.{\_}{\_}bases{\_}{\_}}\\ \mbox{~~~~~~~~if~isinstance(obj,dict):~{\#}~is~a~dict}\\ \mbox{~~~~~~~~~~~~self.dic.update(obj)}\\ \mbox{~~~~~~~~elif~hasattr(obj,"{\_}{\_}dict{\_}{\_}"):~{\#}~has~a~dict}\\ \mbox{~~~~~~~~~~~~self.dic.update(obj.{\_}{\_}dict{\_}{\_})}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} 'ClsFactory[X]' where 'X' is a metaclass returns callable objects acting as class factories. For instance \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{Class=ClsFactory[type]~{\#}~generates~non-conflicting~classes}\\ \mbox{Mixin=ClsFactory[Reflective]~{\#}~generates~reflective~classes}\\ \mbox{~~}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} can be used as a class factories that automatically provides a default name, base classes and dictionary, and avoids meta-type conflicts. 'Mixin' generates reflective classes that can be used as mixin in multiple inheritance hierarchies. Here I give few example of usage of 'Class': \begin{quote} \begin{verbatim}>>> from oopp import * >>> C1,C2,C3=[Class('C'+str(i+1)) for i in range(3)] >>> C1 >>> C2 >>> C3 ]\end{verbatim} \begin{verbatim}>>> Clock=Class('Clock',{'get_time':get_time}) >>> Clock >>> Clock.get_time() 16:01:02\end{verbatim} \end{quote} Another typical usage of 'Class' is the conversion of a module in a class: for instance \begin{quote} \begin{verbatim}>>> time_=Class(time) >>> time_ \end{verbatim} \end{quote} Notice the convention of adding an underscore to the name of the class generated from the 'time' module. \begin{quote} \begin{verbatim}>>> time_.asctime() 'Mon Jan 20 16:33:21 2003'\end{verbatim} \end{quote} Notice that all the functions in the module \texttt{time} has been magically converted in staticmethods of the class \texttt{time{\_}}. An advantage of this approach is that now the module is a class and can be enhanced with metaclasses: for instance we could add tracing capabilities, debugging features, etc. By design, 'Class' and 'Reflective' also works when the first argument is a class or a tuple of base classes: \begin{quote} \begin{verbatim}>>> ClsFactory_=Class(ClsFactory) >>> type(ClsFactory_) >>> ClsFactory_=Mixin(ClsFactory) >>> type(ClsFactory_) # automagically generated metaclass \end{verbatim} \end{quote} %___________________________________________________________________________ \hypertarget{programming-with-metaclasses}{} \pdfbookmark[1]{Programming with metaclasses}{programming-with-metaclasses} \subsection*{Programming with metaclasses} In order to how a non-trivial application of metaclasses in real life, let me come back to the pizza shop example discussed in chapter 4 and 6. \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~Pizza(toppings,**dic):~}\\ \mbox{~~~~~"""This~function~produces~classes~inheriting~from~GenericPizza~and~}\\ \mbox{~~~~~WithLogger,~using~a~metaclass~inferred~from~Logged"""}\\ \mbox{~~~~~toppinglist=toppings.split()}\\ \mbox{~~~~~name='Pizza'+''.join([n.capitalize()~for~n~in~toppinglist])}\\ \mbox{~~~~~dic['toppinglist']=toppinglist}\\ \mbox{~~~~~return~ClsFactory[Logged](name,}\\ \mbox{~~~~~~~~~~~~(GenericPizza,WithLogger,WithMultiCounter),dic)}\\ \mbox{}\\ \mbox{{\#}}\\ \mbox{}\\ \mbox{>>>~from~oopp~import~*}\\ \mbox{>>>~Margherita=Pizza('tomato~mozzarella',verboselog=True)}\\ \mbox{*****************************************************************************}\\ \mbox{Tue~May~13~14:42:17~2003}\\ \mbox{1.~Created~'PizzaTomatoMozzarella'}\\ \mbox{with~accessibile~non-special~attributes:}\\ \mbox{ResetsCounter~=~}\\ \mbox{{\_}GenericPizza{\_}{\_}this~=~}\\ \mbox{{\_}WithCounter{\_}{\_}this~=~}\\ \mbox{{\_}WithLogger{\_}{\_}this~=~}\\ \mbox{baseprice~=~1}\\ \mbox{counter~=~0}\\ \mbox{formatstring~=~{\%}s}\\ \mbox{logfile~=~',~mode~'w'~at~0x402c2058>}\\ \mbox{price~=~}\\ \mbox{sizefactor~=~{\{}'small':~1,~'large':~3,~'medium':~2{\}}}\\ \mbox{topping{\_}unit{\_}price~=~0.5}\\ \mbox{toppinglist~=~['tomato',~'mozzarella']}\\ \mbox{toppings{\_}price~=~}\\ \mbox{verboselog~=~True}\\ \mbox{'PizzaTomatoMozzarella'~is~a~child~of~GenericPizza,WithLogger,}\\ \mbox{WithMultiCounter~and~an~instance~of~{\_}LoggedResetsCounter} \end{flushleft}\end{ttfamily} \end{quote} Notice the \emph{deep} magic: \texttt{Pizza} invokes \texttt{ClsFactory[Logged]} which in turns calls the class factory \texttt{child} that creates 'Margherita' from 'GenericPizza', 'WithLogger' and 'WithMultiCounter' by using the metaclass 'Logged': however, since 'WithMultiCounter', has the internal metaclass 'ResetsCounter' , there is a metatype conflict: \texttt{child} \emph{automagically} solves the conflict by creating the metaclass '{\_}LoggedResetsCounter' that inherits both from 'Logged' and 'ResetsCounter'. At this point, 'Margherita' can be safely created by '{\_}LoggedResetsCounter'. As such, the creation of 'Margherita' will be registered in the log file and 'Margherita' (with all its children) will continue to be able to recognize the special identifier \texttt{this}. \begin{quote} \begin{verbatim}>>> print Margherita('large') ***************************************************************************** Tue May 13 14:47:03 2003 1. Created large pizza with tomato,mozzarella, cost $ 6.0 with accessibile non-special attributes: ResetsCounter = _GenericPizza__this = _WithCounter__this = _WithLogger__this = baseprice = 1 counter = 1 formatstring = %s logfile = ', mode 'w' at 0x402c2058> price = > size = large sizefactor = {'small': 1, 'large': 3, 'medium': 2} topping_unit_price = 0.5 toppinglist = ['tomato', 'mozzarella'] toppings_price = > verboselog = True large pizza with tomato,mozzarella, cost $ 6.0 >>> print MRO(Margherita) MRO of PizzaTomatoMozzarella: 0 - PizzaTomatoMozzarella(GenericPizza,WithLogger)[_LoggedResetsCounter] 1 - GenericPizza(object) 2 - WithLogger(WithCounter,Customizable,PrettyPrinted) 3 - WithMultiCounter(WithCounter)[ResetsCounter] 4 - WithCounter(object) 5 - PrettyPrinted(object) 6 - object()\end{verbatim} \end{quote} Notice that \begin{quote} \begin{verbatim}>>> print Margherita 'PizzaTomatoMozzarella'\end{verbatim} \end{quote} The power of inheritance in this example is quite impressive, since I have reused the same class 'WithLogger' (and its children) both in the metaclass hierarchy and in the regular hierarchy: this means that I have added logging capabilities both to classes and their instances in a strike! And there is no confusion between the two. For instance, there is a \texttt{counter} attribute for the metaclass 'Logged' and many independent \texttt{counter} attributes for any generated class, i.e. for any kind of pizza. \begin{quote} It is interesting to notice that '' itself is an instance of its inner metaclass, as \texttt{type()} would show. This technique avoids the need for inventing a new name for the metaclass. The inner metaclass is automatically inherited by classes inheriting from the outer class. \end{quote} %___________________________________________________________________________ \hypertarget{metaclass-aided-operator-overloading}{} \pdfbookmark[1]{Metaclass-aided operator overloading}{metaclass-aided-operator-overloading} \subsection*{Metaclass-aided operator overloading} As we discussed in chapter 4, inheriting from built-in types is generally painful. The problem is that if P is a primitive class, i.e. a Python built-in type, and D=D(P) is a derived class, then the primitive methods returning P-objects have to be modified (wrapped) in such a way to return D-objects. The problem is expecially clear in the context of operator overloading. Consider for instance the problem of defining a 'Vector' class in the mathematical sense. Mathematically-speaking, vectors are defined as objects that can be summed each other and multiplied by numbers; they can be represented by (finite or infinite) sequences. In the case of finite sequences, vectors can be represented with lists and a vector class can be naturally implemented by subclassing \texttt{list}: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~Vector(list):}\\ \mbox{~~~~"""Implements~finite~dimensional~vectors~as~lists.~Can~be~instantiated}\\ \mbox{~~~~as~Vector([a,b,c,..])~or~as~Vector(a,b,c~..)"""}\\ \mbox{~~~~def~{\_}{\_}add{\_}{\_}(self,other):}\\ \mbox{~~~~~~~~return~[el+other[i]~for~i,el~in~enumerate(self)]}\\ \mbox{~~~~{\_}{\_}radd{\_}{\_}={\_}{\_}add{\_}{\_}}\\ \mbox{~~~~def~{\_}{\_}mul{\_}{\_}(self,scalar):}\\ \mbox{~~~~~~~~return~[el*scalar~for~el~in~self]}\\ \mbox{~~~~def~{\_}{\_}rmul{\_}{\_}(self,scalar):}\\ \mbox{~~~~~~~~return~[scalar*el~for~el~in~self]}\\ \mbox{}\\ \mbox{v=Vector([1,0])}\\ \mbox{w=Vector([0,1])}\\ \mbox{}\\ \mbox{print~v+w,~type(v+w)~}\\ \mbox{print~2*v,~type(2*v)~}\\ \mbox{print~v*2,~type(v*2)~}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} With output \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{[1,~1]~}\\ \mbox{[2,~0]~}\\ \mbox{[2,~0]~} \end{flushleft}\end{ttfamily} \end{quote} The problem is that the overloaded methods must be wrapped in such a way to return \texttt{Vector} object and not \texttt{list} object; moreover, if \texttt{Vector} is subclassed (for instance by defining a \texttt{NumericVector}), the overloaded methods must return instances of the subclass. There is only one way of doing that automatically: trough the magic of metaclasses. Here is the solution, involving an \texttt{autowrappedmethod} descriptor class, that wraps the overloaded operators and is automatically invoked by the metaclass \texttt{AutoWrapped}. \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~autowrappedmethod(wrappedmethod):}\\ \mbox{~~~~"""Makes~the~method~returning~cls~instances,~by~wrapping~its}\\ \mbox{~~~~output~with~cls"""}\\ \mbox{~~~~klass=None~{\#}~has~to~be~fixed~dynamically~from~outside}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(self,meth):}\\ \mbox{~~~~~~~~super(autowrappedmethod,self).{\_}{\_}init{\_}{\_}(meth)~{\#}~cooperative}\\ \mbox{~~~~~~~~self.klass=self.klass~{\#}~class~variable~->~instance~variable}\\ \mbox{~~~~def~wrapper(self):~{\#}~closure}\\ \mbox{~~~~~~~~return~lambda~*args,**kw:~self.klass(self.func(*args,**kw))}\\ \mbox{}\\ \mbox{class~AutoWrapped(type):}\\ \mbox{~~~~"""Metaclass~that~looks~at~the~methods~declared~in~the~attributes~}\\ \mbox{~~~~builtinlist~and~wraplist~of~its~instances~and~wraps~them~with}\\ \mbox{~~~~autowrappedmethod."""}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(cls,name,bases,dic):}\\ \mbox{~~~~~~~~super(AutoWrapped,cls).{\_}{\_}init{\_}{\_}(name,bases,dic)~{\#}~cooperative}\\ \mbox{~~~~~~~~cls.builtinlist=getattr(cls,'builtinlist',[])}\\ \mbox{~~~~~~~~if~not~hasattr(cls,'diclist')~:~{\#}~true~only~at~the~first~call}\\ \mbox{~~~~~~~~~~~~cls.diclist=[(a,vars(bases[0])[a])~for~a~in~cls.builtinlist]}\\ \mbox{~~~~~~~~if~dic.has{\_}key('wraplist'):~{\#}~can~be~true~at~any~call}\\ \mbox{~~~~~~~~~~~~cls.diclist+=[(a,dic[a])~for~a~in~cls.wraplist]~}\\ \mbox{~~~~~~~~wrapper=autowrappedmethod.With(klass=cls)}\\ \mbox{~~~~~~~~d=dict([(a,wrapper(v))~for~a,v~in~cls.diclist])}\\ \mbox{~~~~~~~~customize(cls,**d)}\\ \mbox{~~~~}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Now the \texttt{Vector} class can be written as \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~Vector(list):}\\ \mbox{~~~~"""Implements~finite~dimensional~vectors~as~lists.~Can~be~instantiated}\\ \mbox{~~~~as~Vector([a,b,c,..])~or~as~Vector(a,b,c~..)"""}\\ \mbox{~~~~{\_}{\_}metaclass{\_}{\_}=AutoWrapped}\\ \mbox{~~~~wraplist='{\_}{\_}add{\_}{\_}~{\_}{\_}radd{\_}{\_}~{\_}{\_}mul{\_}{\_}~{\_}{\_}rmul{\_}{\_}'.split()}\\ \mbox{~~~~def~{\_}{\_}add{\_}{\_}(self,other):}\\ \mbox{~~~~~~~~return~[el+other[i]~for~i,el~in~enumerate(self)]}\\ \mbox{~~~~{\_}{\_}radd{\_}{\_}={\_}{\_}add{\_}{\_}}\\ \mbox{~~~~def~{\_}{\_}mul{\_}{\_}(self,scalar):}\\ \mbox{~~~~~~~~return~[scalar*el~for~el~in~self]}\\ \mbox{~~~~def~{\_}{\_}rmul{\_}{\_}(self,scalar):}\\ \mbox{~~~~~~~~return~[el*scalar~for~el~in~self]}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Here the \texttt{AutoWrapped} metaclass wraps the output of \texttt{{\_}{\_}add{\_}{\_}, {\_}{\_}radd{\_}{\_}, {\_}{\_}mul{\_}{\_}, {\_}{\_}rmul{\_}{\_}}, guaranteeing that they returns \texttt{Vector} instances or instances of some subclass of \texttt{Vector}, if \texttt{Vector} is subclassed. This is an example of usage: \begin{quote} % doctest \begin{verbatim}>>> from oopp import Vector >>> v=Vector([1,0]) >>> v >>> w=Vector([0,1]) >>> v+2*w >>> print v+2*w [1, 2]\end{verbatim} \end{quote} It should be clear by now that metaclasses are the natural framework where to discuss operator overloading (at least in languages that have metaclasses ;-). After all, operator overloading is another kind of (very nice) syntactic sugar and we know already that metaclasses are very good when we need syntactic sugar. %___________________________________________________________________________ \hypertarget{advanced-metaprogramming-techniques}{} \pdfbookmark[0]{ADVANCED METAPROGRAMMING TECHNIQUES}{advanced-metaprogramming-techniques} \section*{ADVANCED METAPROGRAMMING TECHNIQUES} In elementary OOP, the programmer works with objects; in advanced OOP, the programmer works with classes, taking full advantage of (multiple) inheritance and metaclasses. Metaprograming is the activity of building, composing and modifying classes. I will give various examples of metaprogramming techniques using run-time class modifications multiple inheritance, metaclasses, attribute descriptors and even simple functions. Moreover, I will show show metaclasses can change the semantics of Python programs: hence theire reputation of \emph{black} magic. That is to say that the techniques explained here are dangerous! %___________________________________________________________________________ \hypertarget{on-code-processing}{} \pdfbookmark[1]{On code processing}{on-code-processing} \subsection*{On code processing} It is a good programming practice to avoid the direct modification of source code. Nevertheless, there are situations where the ability of modifying the source code \emph{dynamically} is invaluable. Python has the capability of \newcounter{listcnt31} \begin{list}{\arabic{listcnt31})} { \usecounter{listcnt31} \setlength{\rightmargin}{\leftmargin} } \item {} generating new code from scratch; \item {} modifying pre-existing source code; \item {} executing the newly created/modified code at run-time. \end{list} The capability of creating source code and executing it \emph{immediately} has no equivalent in static languages such as C/C++/Java and it is maybe the most poweful feature of dynamics languages such as Java/Python/Perl. This feature has been exploited to its ultimate consequences in the languages of the Lisp family, in which one can use incredibly poweful macros, which in a broad sense, are programs that write themselves In this chapter I will discuss how to implement macros in Python and I will present some of the miracles you may perform with this technique. To this aim, I will discuss various ways of manipulating Python source code, by using regular expressions and state machines. %___________________________________________________________________________ \hypertarget{regular-expressions}{} \pdfbookmark[1]{Regular expressions}{regular-expressions} \subsection*{Regular expressions} \begin{quote} \begin{flushleft} \emph{Some~people,~when~confronted~with~a~problem,~~\\ think~''I~know,~I'll~use~regular~expressions.``~~\\ Now~they~have~two~problems.}~\\ ~~~--~Jamie~Zawinski \end{flushleft} \end{quote} Python source code is a kind of text and can manipulated with the same techniques that are used to manipulate text: \newcounter{listcnt32} \begin{list}{\arabic{listcnt32}.} { \usecounter{listcnt32} \setlength{\rightmargin}{\leftmargin} } \item {} the trivial search and replace; \item {} regular expressions; \item {} state machines; \item {} parsers \end{list} There is not very much to say about the search and replace methods: it is fast, efficient and it works. It should always be used whenever possible. However, in this chapter I will only be interested in cases where something more sophisticated than a plain search and replace is needed. Cases that can be managed with regular expressions or with something even more sophisticated than them: a state machine or even a full featured parser. I will \emph{not} give a primer on regular expression here, since they are already well documented in the standard documentation (see Andrew's Kuchling 'Howto') as well in many books (for instance 'Mastering Regular Expression' first edition and 'Python in a Nutshell'). Instead, I will give various practical examples of usage. \begin{quote} \begin{verbatim}>>> import re >>> reobj=re.compile(r'x')\end{verbatim} \end{quote} %___________________________________________________________________________ \hypertarget{more-on-metaclasses-and-subclassing-built-in-types}{} \pdfbookmark[1]{More on metaclasses and subclassing built-in types}{more-on-metaclasses-and-subclassing-built-in-types} \subsection*{More on metaclasses and subclassing built-in types} Subclassing \texttt{list} is easy since there are no methods returning lists except the methods correspondings to the '+' and '*' operators. Subclassing \texttt{str} is more complicated, since one has many methods that return strings. Nevertheless, it can be done with the \texttt{AutoWrapped} metaclass, simply by specifying the list of the builtins to be wrapped. \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~Str(str):}\\ \mbox{~~~~{\_}{\_}metaclass{\_}{\_}=AutoWrapped}\\ \mbox{~~~~builtinlist="""{\_}{\_}add{\_}{\_}~{\_}{\_}mod{\_}{\_}~{\_}{\_}mul{\_}{\_}~{\_}{\_}rmod{\_}{\_}~{\_}{\_}rmul{\_}{\_}~capitalize}\\ \mbox{~~~~~~~~center~expandtabs~join~ljust~lower~lstrip~replace~rjust~rstrip~strip}\\ \mbox{~~~~~~~~swapcase~title~translate~upper~zfill""".split()}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Here I show various tests. \begin{quote} % doctest \begin{verbatim}>>> from oopp import Str >>> sum=Str('a')+Str('b') # check the sum >>> print sum, type(sum) ab >>> rprod=Str('a')*2 # check the right product >>> print rprod,type(rprod) aa >>> lprod=2*Str('a') # check the left product >>> print lprod,type(lprod) aa >>> r=Str('a').replace('a','b') # check replace >>> print r,type(r) b >>> r=Str('a').capitalize() # check capitalize >>> print r,type(r) A \end{verbatim} \end{quote} \texttt{Str} acts as a nice base class to built abstractions based on strings. In particular, regular expressions can be built on top of strings describing their representation (I remind that if \texttt{x} is a regular expression object, \texttt{x.pattern} is its string representation). Then, the sum of two regular expressions \texttt{x} and \texttt{y} can be defined as the sum of their string representation, \texttt{(x+y).pattern=x.pattern+y.pattern}. Moreover, it is convenient to define the \texttt{{\_}{\_}or{\_}{\_}} method of two regular expression in such a way that \texttt{(x | y).pattern=x.pattern+'|'+y.pattern}. All this can be achieved trough the following class: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~BaseRegexp(Str):}\\ \mbox{}\\ \mbox{~~~~builtinlist=['{\_}{\_}radd{\_}{\_}',~'{\_}{\_}ror{\_}{\_}']}\\ \mbox{~~~~wraplist=['{\_}{\_}add{\_}{\_}','{\_}{\_}or{\_}{\_}']}\\ \mbox{}\\ \mbox{~~~~{\_}{\_}add{\_}{\_}~=~lambda~self,other:~self.pattern~+~other~}\\ \mbox{~~~~{\_}{\_}or{\_}{\_}~~=~lambda~self,other:~self.pattern+'|'+other}\\ \mbox{}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}~(self,regexp):}\\ \mbox{~~~~~~~~"Adds~to~str~methods~the~regexp~methods"}\\ \mbox{~~~~~~~~reobj=re.compile(regexp)}\\ \mbox{~~~~~~~~for~attr~in~dir(reobj)+['pattern']:}\\ \mbox{~~~~~~~~~~~~setattr(self,attr,getattr(reobj,attr))}\\ \mbox{}\\ \mbox{{\#}}\\ \mbox{}\\ \mbox{>>>~from~oopp~import~*}\\ \mbox{>>>~aob=BaseRegexp('a')|BaseRegexp('b');~print~aob}\\ \mbox{a|b}\\ \mbox{>>>~print~pretty(attributes(aob))}\\ \mbox{encode~=~}\\ \mbox{endswith~=~}\\ \mbox{expandtabs~=~}\\ \mbox{find~=~}\\ \mbox{findall~=~}\\ \mbox{finditer~=~}\\ \mbox{index~=~}\\ \mbox{isalnum~=~}\\ \mbox{isalpha~=~}\\ \mbox{isdigit~=~}\\ \mbox{islower~=~}\\ \mbox{isspace~=~}\\ \mbox{istitle~=~}\\ \mbox{isupper~=~}\\ \mbox{join~=~}\\ \mbox{ljust~=~}\\ \mbox{lower~=~}\\ \mbox{lstrip~=~}\\ \mbox{match~=~}\\ \mbox{pattern~=~ba}\\ \mbox{replace~=~}\\ \mbox{rfind~=~}\\ \mbox{rindex~=~}\\ \mbox{rjust~=~}\\ \mbox{rstrip~=~}\\ \mbox{scanner~=~}\\ \mbox{search~=~}\\ \mbox{split~=~}\\ \mbox{splitlines~=~}\\ \mbox{startswith~=~}\\ \mbox{strip~=~}\\ \mbox{sub~=~}\\ \mbox{subn~=~}\\ \mbox{swapcase~=~}\\ \mbox{title~=~}\\ \mbox{translate~=~}\\ \mbox{upper~=~}\\ \mbox{wraplist~=~['{\_}{\_}add{\_}{\_}',~'{\_}{\_}radd{\_}{\_}',~'{\_}{\_}or{\_}{\_}',~'{\_}{\_}ror{\_}{\_}']}\\ \mbox{zfill~=~}\\ \mbox{}\\ \mbox{{\#}}\\ \mbox{}\\ \mbox{class~Regexp(BaseRegexp):}\\ \mbox{~~~~class~{\_}{\_}metaclass{\_}{\_}(BaseRegexp.{\_}{\_}metaclass{\_}{\_}):}\\ \mbox{~~~~~~~~def~{\_}{\_}setattr{\_}{\_}(cls,name,value):}\\ \mbox{~~~~~~~~~~~~if~name==name.upper():~{\#}~all~caps~means~regexp~constant}\\ \mbox{~~~~~~~~~~~~~~~~if~not~isinstance(value,cls):~value=cls(value)}\\ \mbox{~~~~~~~~~~~~~~~~value.name=name~{\#}~set~regexp~name}\\ \mbox{~~~~~~~~~~~~BaseRegexp.{\_}{\_}metaclass{\_}{\_}.{\_}{\_}setattr{\_}{\_}(cls,name,value)}\\ \mbox{~~~~~~~~~~~~{\#}~basic~setattr}\\ \mbox{}\\ \mbox{~~~~def~named(self,name=None):}\\ \mbox{~~~~~~~~name=getattr(self,'name',name)}\\ \mbox{~~~~~~~~if~name~is~None:~raise~'Unnamed~regular~expression'}\\ \mbox{~~~~~~~~return~self.{\_}{\_}class{\_}{\_}('(?P<{\%}s>{\%}s)'~{\%}~(name,self.pattern))}\\ \mbox{}\\ \mbox{~~~~generateblocks=generateblocks}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} The magic of \texttt{Regexp.{\_}{\_}metaclass{\_}{\_}} allows to generate a library of regular expressions in an elegant way: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{r=Regexp}\\ \mbox{}\\ \mbox{customize(r,}\\ \mbox{~~~~DOTALL~=r'(?s)'~,~~~~~~{\#}~starts~the~DOTALL~mode;~must~be~at~the~beginning}\\ \mbox{~~~~NAME~~~=r'{\textbackslash}b[a-zA-Z{\_}]{\textbackslash}w*',~{\#}~plain~Python~name}\\ \mbox{~~~~EXTNAME=r'{\textbackslash}b[a-zA-Z{\_}][{\textbackslash}w{\textbackslash}.]*',~{\#}~Python~name~with~or~without~dots}\\ \mbox{~~~~DOTNAME=r'{\textbackslash}b[a-zA-Z{\_}]{\textbackslash}w*{\textbackslash}.[{\textbackslash}w{\textbackslash}.]*',{\#}~Python~name~with~(at~least~one)~dots}\\ \mbox{~~~~COMMENT=r"{\#}.*?(?={\textbackslash}n)",~{\#}~Python~comment}\\ \mbox{~~~~QUOTED1="'.+?'",~~~~~~~{\#}~single~quoted~string~'}\\ \mbox{~~~~QUOTED2='".+?"',~~~~~~~{\#}~single~quoted~string~"}\\ \mbox{~~~~TRIPLEQ1="'''.+?'''",~~{\#}~triple~quoted~string~'}\\ \mbox{~~~~TRIPLEQ2='""".+?"""'~~~{\#}~triple~quoted~string~"~}\\ \mbox{~~)}\\ \mbox{}\\ \mbox{r.STRING=r.TRIPLEQ1|r.TRIPLEQ2|r.QUOTED1|r.QUOTED2}\\ \mbox{r.CODESEP=r.DOTALL+r.COMMENT.named()|r.STRING.named()}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} The trick is in the redefinition of \texttt{{\_}{\_}setattr{\_}{\_}}, which magically converts all caps attributes in \texttt{Regexp} objects. The features of \texttt{Regexp} can be tested with the following code: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{"""This~script~looks~at~its~own~source~code~and~extracts~dotted~names,}\\ \mbox{i.e.~names~containing~at~least~one~dot,~such~as~object.attribute~or}\\ \mbox{more~general~one,~such~as~obj.attr.subattr."""}\\ \mbox{}\\ \mbox{{\#}~Notice~that~dotted.names~in~comments~and~literal~strings~are~ignored}\\ \mbox{}\\ \mbox{from~oopp~import~*}\\ \mbox{import~{\_}{\_}main{\_}{\_}}\\ \mbox{}\\ \mbox{text=inspect.getsource({\_}{\_}main{\_}{\_})}\\ \mbox{}\\ \mbox{regexp=Regexp.CODESEP|~Regexp.DOTNAME.named()}\\ \mbox{}\\ \mbox{print~'Using~the~regular~expression',regexp}\\ \mbox{}\\ \mbox{print~"I~have~found~the~following~dotted~names:{\textbackslash}n{\%}s"~{\%}~[}\\ \mbox{~~~~MO.group()~for~MO~in~regexp.finditer(text)~if~MO.lastgroup=='DOTNAME']}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} with output: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{Using~the~regular~expression~(?s)(?P{\#}.*?(?={\textbackslash}n))|(?P}\\ \mbox{'''.+?'''|""".+?"""|'.+?'|".+?")|(?P[a-zA-Z{\_}]{\textbackslash}w*{\textbackslash}.[{\textbackslash}w{\textbackslash}.]*)}\\ \mbox{I~have~found~the~following~dotted~names:}\\ \mbox{['inspect.getsource',~'Regexp.CODESEP',~'Regexp.DOTNAME.named',~'MO.group',~}\\ \mbox{~'dotname.finditer',~'MO.lastgroup']} \end{flushleft}\end{ttfamily} \end{quote} Now one can define a good \texttt{CodeStr} class with replacing features Let me consider for instance the solution to the problem discussed in chapter 4, i.e. the definition of a \texttt{TextStr} class able to indent and dedent blocks of text. \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~codeprocess(code,TPO):~{\#}~TPO=text~processing~operator}\\ \mbox{~~~~code=code.replace("{\textbackslash}{\textbackslash}'","{\textbackslash}x01").replace('{\textbackslash}{\textbackslash}"','{\textbackslash}x02')}\\ \mbox{~~~~genblock,out~=~Regexp.CODESEP.generateblocks(code),[]}\\ \mbox{~~~~for~block~in~genblock:}\\ \mbox{~~~~~~~~out.append(TPO(block))}\\ \mbox{~~~~~~~~out.append(genblock.next())}\\ \mbox{~~~~return~''.join(out).replace("{\textbackslash}x01","{\textbackslash}{\textbackslash}'").replace('{\textbackslash}x02','{\textbackslash}{\textbackslash}"')}\\ \mbox{}\\ \mbox{def~quotencode(text):}\\ \mbox{~~~~return~text.replace("{\textbackslash}{\textbackslash}'","{\textbackslash}x01").replace('{\textbackslash}{\textbackslash}"','{\textbackslash}x02')}\\ \mbox{}\\ \mbox{def~quotdecode(text):}\\ \mbox{~~~~return~text.replace("{\textbackslash}x01","{\textbackslash}{\textbackslash}'").replace('{\textbackslash}x02','{\textbackslash}{\textbackslash}"')}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Here is an example of usage: replacing 'Print' with 'print' except in comments and literal strings. \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{from~oopp~import~codeprocess}\\ \mbox{}\\ \mbox{wrongcode=r'''}\\ \mbox{"""Code~processing~example:~replaces~'Print'~with~'print'~except~in}\\ \mbox{comments~and~literal~strings"""}\\ \mbox{Print~"This~program~prints~{\textbackslash}"Hello~World!{\textbackslash}""~{\#}~look~at~this~line!}\\ \mbox{'''}\\ \mbox{}\\ \mbox{fixPrint=lambda~s:~s.replace('Print','print')}\\ \mbox{validcode=codeprocess(wrongcode,fixPrint)}\\ \mbox{}\\ \mbox{print~'Source~code:{\textbackslash}n',validcode}\\ \mbox{print~'Output:{\textbackslash}n';~exec~validcode}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} with output \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{Source~code:}\\ \mbox{}\\ \mbox{"""Code~processing~example:~replaces~'Print'~with~'print'~except~in}\\ \mbox{comments~and~literal~strings"""}\\ \mbox{print~"Prints~{\textbackslash}"Hello~World!{\textbackslash}""~{\#}~look~at~this~line!}\\ \mbox{}\\ \mbox{Output:}\\ \mbox{}\\ \mbox{This~program~prints~"Hello~World!"} \end{flushleft}\end{ttfamily} \end{quote} %___________________________________________________________________________ \hypertarget{a-simple-state-machine}{} \pdfbookmark[1]{A simple state machine}{a-simple-state-machine} \subsection*{A simple state machine} Regular expression, however powerful, are limited in scope since they cannot recognize recursive structures. For instance, they cannot parse parenthesized expression. The simplest way to parse a parenthesized expression is to use a state machine. \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{(?:...)~non-grouping}\\ \mbox{(?P...)~}\\ \mbox{}\\ \mbox{(?=...)~look-ahead~}\\ \mbox{(?!...)~negative}\\ \mbox{(?<=...)~look-behind~}\\ \mbox{(?~Regexp(r'''~..~''')}\\ \mbox{reobj2~~:~R"~..~(?~Regexp(r"""~..~""")}\\ \mbox{string1~:~(?~'''~..~'''}\\ \mbox{string2~:~(?~"""~..~"""}\\ \mbox{"""}\\ \mbox{}\\ \mbox{beg=0;~end=1}\\ \mbox{}\\ \mbox{string1[beg]=r"(?~Regexp(r'''~..~''')}\\ \mbox{reobj2~~:~R"~..~(?~Regexp(r"""~..~""")}\\ \mbox{string1~:~(?~'''~..~'''}\\ \mbox{string2~:~(?~"""~..~"""}\\ \mbox{"""}\\ \mbox{}\\ \mbox{beg={\{}{\}};~end={\{}{\}};~ls=[]}\\ \mbox{for~line~in~decl.splitlines():}\\ \mbox{~~~mode,rest=line.split('~:~')}\\ \mbox{~~~s,r=rest.split('~->~')}\\ \mbox{}\\ \mbox{~beg[mode],end[mode]=s.split('~..~')}\\ \mbox{~ls.append('(?P{\%}s)'~{\%}~(mode,beg[mode]))}\\ \mbox{~ls.append('(?P{\%}s)'~{\%}~(mode,end[mode]))}\\ \mbox{}\\ \mbox{~beg2[mode],end2[mode]=r.split('~..~')}\\ \mbox{~ls.append(beg2[mode])}\\ \mbox{~ls.append(end2[mode])}\\ \mbox{}\\ \mbox{delimiters='({\%}s)'~{\%}~re.compile('|'.join(ls))}\\ \mbox{splitlist=['']+delimiters.split(source)}\\ \mbox{for~delim,text~in~splitlist:}\\ \mbox{~~~~delimiters.match(delim).lastgroup} \end{flushleft}\end{ttfamily} \end{quote} %___________________________________________________________________________ \hypertarget{creating-classes}{} \pdfbookmark[1]{Creating classes}{creating-classes} \subsection*{Creating classes} TODO %___________________________________________________________________________ \hypertarget{modifying-modules}{} \pdfbookmark[1]{Modifying modules}{modifying-modules} \subsection*{Modifying modules} Metaclasses are extremely useful since they allows to change the behaviour of the code without changing the sources. For instance, suppose you have a large library written by others that you want to enhance in some way. Typically, it is always a bad idea to modify the sources, for many reasons: \begin{itemize} \item {} touching code written by others, you may introduce new bugs; \item {} you may have many scripts that requires the original version of the library, not the modified one; \item {} if you change the sources and then you buy the new version of the library, you have to change the sources again! \end{itemize} The solution is to enhance the proprierties of the library at run time, when the module is imported, by using metaclasses. To show a concrete example, let me consider the case of the module \emph{commands} in the Standard Library. This module is Unix-specific, and cannot be used under Windows. It would be nice to have a metaclass able to enhance the module in such a way that when it is invoked on a Windows platform, Windows specific replacement of the Unix functions provided in the module are used. However, for sake of brevity, I will only give a metaclasses that display a nice message in the case we are in a Window platform, without raising an error (one could easily implement such a behaviour, however). \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{import~oopp,sys,commands}\\ \mbox{}\\ \mbox{class~WindowsAware(type):}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(cls,*args):~}\\ \mbox{~~~~~~~~if~sys.platform=='win32':~}\\ \mbox{~~~~~~~~~~~~for~key,val~in~vars(cls).iteritems():}\\ \mbox{~~~~~~~~~~~~~~~~if~isinstance(val,staticmethod):}\\ \mbox{~~~~~~~~~~~~~~~~~~~~setattr(cls,key,staticmethod(lambda~*args:~}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~"Sorry,~you~are~(or~I~pretend~you~are)~on~Windows,"}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~~~"~you~cannot~use~the~{\%}s.module"~{\%}~cls.{\_}{\_}name{\_}{\_}))}\\ \mbox{~~~~~~~~~~}\\ \mbox{sys.platform="win32"~{\#}just~in~case~you~are~not~on~Windows}\\ \mbox{}\\ \mbox{commands=oopp.ClsFactory[WindowsAware](commands)}\\ \mbox{}\\ \mbox{print~commands.getoutput('date')~{\#}cannot~be~executed~on~Windows}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} The output of this script is \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{Sorry,~you~are~on~Windows,~you~cannot~use~the~commands.module} \end{flushleft}\end{ttfamily} \end{quote} However, if you are on Linux and you comment out the line \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{sys.platform="win32"~} \end{flushleft}\end{ttfamily} \end{quote} you will see that the script works. Notice that the line \texttt{commands=WindowsAware(commands)} actually converts the 'commands' module in a 'commands' class, but since the usage is the same, this will fool all programs using the commands module. In this case the class factory 'WindowsAware' can also be thought as a module modifier. In this sense, it is very useful to denote the metaclass with an \emph{adjective}. %___________________________________________________________________________ \hypertarget{metaclasses-and-attribute-descriptors}{} \pdfbookmark[1]{Metaclasses and attribute descriptors}{metaclasses-and-attribute-descriptors} \subsection*{Metaclasses and attribute descriptors} Descriptors are especially useful in conjunction with metaclasses, since a custom metaclass can use them as low level tools to modify the methods and the attributes of its instances. This allows to implement very sophisticated features with few lines of code. Notice, anyway, that even plain old function can be thought of as of descriptors. Descriptors share at least two features with metaclasses: \newcounter{listcnt33} \begin{list}{\arabic{listcnt33}.} { \usecounter{listcnt33} \setlength{\rightmargin}{\leftmargin} } \item {} as metaclasses, descriptors are best used as adjectives, since they are intended to modify and enhance standard methods and attributes, in the same sense metaclasses modify and enhance standard classes; \item {} as metaclasses, descriptors can change the \emph{semantics} of Python, i.e. what you see is not necessarely what you get. As such, they are a dangerous feature. Use them with judgement! \end{list} Now I will show a possible application of properties. Suppose one has a given class with various kind of attributes (plain methods, regular methods, static methods, class methods, properties and data attributes) and she wants to trace to access to the data attributes (notice that the motivation for the following problem come from a real question asked in comp.lang.python). Then one needs to retrieve data attributes from the class and convert them in properties controlling their access syntax. The first problem is solved by a simple function \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~isplaindata(a):}\\ \mbox{~~~~"""A~data~attribute~has~no~{\_}{\_}get{\_}{\_}~or~{\_}{\_}set{\_}{\_}~attributes,~is~not}\\ \mbox{~~~~a~built-in~function,~nor~a~built-in~method."""~}\\ \mbox{~~~~return~not(hasattr(a,'{\_}{\_}get{\_}{\_}')~or~hasattr(a,'{\_}{\_}set{\_}{\_}')}\\ \mbox{~~~~~~~~~~~~~~~or~isinstance(a,BuiltinMethodType)~or}\\ \mbox{~~~~~~~~~~~~~~~isinstance(a,BuiltinFunctionType))}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} whereas the second problem is elegantly solved by a custom metaclass: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{from~oopp~import~isplaindata,inspect}\\ \mbox{}\\ \mbox{class~TracedAccess(type):}\\ \mbox{~~~~"Metaclass~converting~data~attributes~to~properties"}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(cls,name,bases,dic):}\\ \mbox{~~~~~~~~cls.datadic={\{}{\}}}\\ \mbox{~~~~~~~~for~a~in~dic:}\\ \mbox{~~~~~~~~~~~~if~isplaindata(a):}\\ \mbox{~~~~~~~~~~~~~~~~cls.datadic[a]=dic[a]}\\ \mbox{~~~~~~~~~~~~~~~~def~get(self,a=a):}\\ \mbox{~~~~~~~~~~~~~~~~~~~~v=cls.datadic[a]}\\ \mbox{~~~~~~~~~~~~~~~~~~~~print~"Accessing~{\%}s,~value={\%}s"~{\%}~(a,v)}\\ \mbox{~~~~~~~~~~~~~~~~~~~~return~v}\\ \mbox{~~~~~~~~~~~~~~~~def~set(self,v,a=a):}\\ \mbox{~~~~~~~~~~~~~~~~~~~~print~"Setting~{\%}s,~value={\%}s"~{\%}~(a,v)}\\ \mbox{~~~~~~~~~~~~~~~~~~~~cls.datadic[a]=v}\\ \mbox{~~~~~~~~~~~~~~~~setattr(cls,a,property(get,set))}\\ \mbox{}\\ \mbox{class~C(object):}\\ \mbox{~~~~{\_}{\_}metaclass{\_}{\_}~=~TracedAccess}\\ \mbox{~~~~a1='x'}\\ \mbox{}\\ \mbox{class~D(C):~{\#}~shows~that~the~approach~works~well~with~inheritance}\\ \mbox{~~~~a2='y'}\\ \mbox{}\\ \mbox{i=D()}\\ \mbox{i.a1~{\#}~=>~Accessing~a1,~value=x}\\ \mbox{i.a2~{\#}~=>~Accessing~a2,~value=y}\\ \mbox{i.a1='z'~{\#}~=>~Setting~a1,~value=z}\\ \mbox{i.a1~{\#}~=>~Accessing~a1,~value=z}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} In this example the metaclass looks at the plain data attributes (recognized thanks ot the \texttt{isplaindata} function) of its instances and put them in the dictionary \texttt{cls.datadic}. Then the original attributes are replaced with property objects tracing the access to them. The solution is a 4-line custom metaclass doing the boring job for me: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~Wrapped(Customizable,type):}\\ \mbox{~~~~"""A~customizable~metaclass~to~wrap~methods~with~a~given~wrapper~and}\\ \mbox{~~~~a~given~condition"""}\\ \mbox{~~~~{\_}{\_}metaclass{\_}{\_}=Reflective}\\ \mbox{~~~~wrapper=wrappedmethod}\\ \mbox{~~~~condition=lambda~k,v:~True~{\#}~wrap~all}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(cls,*args):}\\ \mbox{~~~~~~~~super(cls.{\_}{\_}this,cls).{\_}{\_}init{\_}{\_}(*args)}\\ \mbox{~~~~~~~~wrap(cls,cls.wrapper,cls.condition.im{\_}func)}\\ \mbox{}\\ \mbox{Traced=Wrapped.With(wrapper=tracedmethod,{\_}{\_}name{\_}{\_}='Traced')}\\ \mbox{Timed=Wrapped.With(wrapper=timedmethod,{\_}{\_}name{\_}{\_}='Timed')}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Here is an example of usage: \begin{quote} \begin{verbatim}>>> from oopp import * >>> time_=ClsFactory[Traced](time) >>> print time_.asctime() [time_] Calling 'asctime' with arguments (){} ... -> 'time_.asctime' called with result: Sun May 4 07:30:51 2003 Sun May 4 07:30:51 2003\end{verbatim} \end{quote} Another is \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{from~oopp~import~ClsFactory,Traced,Reflective}\\ \mbox{}\\ \mbox{def~f1(x):~return~x~~~~~{\#}~nested~functions~}\\ \mbox{def~f2(x):~return~f1(x)~{\#}~we~want~to~trace}\\ \mbox{}\\ \mbox{f1orf2=lambda~k,v~:~v~is~f1~or~v~is~f2}\\ \mbox{make=ClsFactory[Reflective,Traced.With(condition=f1orf2)]}\\ \mbox{traced=make('traced',globals())}\\ \mbox{}\\ \mbox{traced.f2('hello!')~{\#}~call~traced.f2}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} with output \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{[{\_}{\_}main{\_}{\_}]~Calling~'f2'~with~arguments}\\ \mbox{('hello!',){\{}{\}}~...}\\ \mbox{[{\_}{\_}main{\_}{\_}]~Calling~'f1'~with~arguments}\\ \mbox{('hello!',){\{}{\}}~...}\\ \mbox{->~'{\_}{\_}main{\_}{\_}.f1'~called~with~result:~hello!}\\ \mbox{->~'{\_}{\_}main{\_}{\_}.f2'~called~with~result:~hello!} \end{flushleft}\end{ttfamily} \end{quote} %___________________________________________________________________________ \hypertarget{id46}{} \pdfbookmark[1]{Modifying hierarchies}{id46} \subsection*{Modifying hierarchies} Suppose one wants to enhance a pre-existing class, for instance by adding tracing capabilities to it. The problem is non-trivial since it is not enough to derive a new class from the original class using the 'Traced' metaclass. For instance, we could imagine of tracing the 'Pizza' class introduced in chapter 4 by defining \begin{quote} \begin{verbatim}>>> from oopp import * >>> class TracedTomatoPizza(GenericPizza,WithLogger): ... __metaclass__=ClsFactory[Traced] ... toppinglist=['tomato']\end{verbatim} \end{quote} However, this would only trace the methods of the newly defined class, not of the original one. Since the new class does not introduce any non-trivial method, the addition of 'Traced' is practically without any effect: \begin{quote} \begin{verbatim}>>> marinara=TracedTomatoPizza('small') # nothing happens ***************************************************************************** Tue Apr 15 11:00:17 2003 1. Created small pizza with tomato, cost $ 1.5\end{verbatim} \end{quote} %___________________________________________________________________________ \hypertarget{tracing-hierarchies}{} \pdfbookmark[1]{Tracing hierarchies}{tracing-hierarchies} \subsection*{Tracing hierarchies} \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{from~oopp~import~*}\\ \mbox{}\\ \mbox{def~wrapMRO(cls,wrapped):}\\ \mbox{~~~~for~c~in~cls.{\_}{\_}mro{\_}{\_}[:-1]:}\\ \mbox{~~~~~~~~wrap(c,wrapped)}\\ \mbox{}\\ \mbox{tracing=tracedmethod.With(logfile=file('trace.txt','w'))}\\ \mbox{wrapMRO(HomoSapiensSapiens,tracing)}\\ \mbox{HomoSapiensSapiens().can()}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} with output in trace.txt \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{[HomoSapiensSapiens]~Calling~'can'~with~arguments}\\ \mbox{~(,){\{}{\}}~...}\\ \mbox{~~~~[HomoSapiens]~Calling~'can'~with~arguments}\\ \mbox{~~~~~(,){\{}{\}}~...}\\ \mbox{~~~~~~~~[HomoHabilis]~Calling~'can'~with~arguments}\\ \mbox{~~~~~~~~~(,){\{}{\}}~...}\\ \mbox{~~~~~~~~~~~~[Homo]~Calling~'can'~with~arguments}\\ \mbox{~~~~~~~~~~~~~(,){\{}{\}}~...}\\ \mbox{~~~~~~~~~~~~~~~~[PrettyPrinted]~Calling~'{\_}{\_}str{\_}{\_}'~with~arguments}\\ \mbox{~~~~~~~~~~~~~~~~~(,){\{}{\}}~...}\\ \mbox{~~~~~~~~~~~~~~~~[PrettyPrinted.{\_}{\_}str{\_}{\_}]~called~with~result:~}\\ \mbox{~~~~~~~~~~~~~~~~~}\\ \mbox{~~~~~~~~~~~~[Homo.can]~called~with~result:~None}\\ \mbox{~~~~~~~~[HomoHabilis.can]~called~with~result:~None}\\ \mbox{~~~~[HomoSapiens.can]~called~with~result:~None}\\ \mbox{[HomoSapiensSapiens.can]~called~with~result:~None} \end{flushleft}\end{ttfamily} \end{quote} %___________________________________________________________________________ \hypertarget{modifying-source-code}{} \pdfbookmark[1]{Modifying source code}{modifying-source-code} \subsection*{Modifying source code} The real solution would be to derive the original class 'GenericPizza' from 'Traced' and not from 'object'. One could imagine of creating a new class inhering from 'Traced' and with all the methods of the original 'GenericPizza' class; then one should create copies of all the classes in the whole multiple inheritance hierarchy. This would be a little annoying, but feasable; the real problem is that this approach would not work with cooperative methods, since cooperative calls in the derived classes would invoked methods in the original classes, which are not traced. This is a case where the modification of the original source code is much more appealing and simpler that any other method: it is enough to perform a search and replace in the original source code, by adding the metaclass 'Traced', to enhance the whole multiple inheritance hierarchy. Let me assume that the hierarchy is contained in a module (which is typical case). The idea, is to generate \emph{dynamically} a new module from the modified source code, with a suitable name to avoid conflicts with the original module. Incredibily enough, this can be done in few lines: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~modulesub(s,r,module):}\\ \mbox{~~~~"Requires~2.3"}\\ \mbox{~~~~name=module.{\_}{\_}name{\_}{\_}}\\ \mbox{~~~~source=inspect.getsource(module).replace(s,r)}\\ \mbox{~~~~dic={\{}name:~module{\}};~exec~source~in~dic~{\#}~exec~the~modified~module}\\ \mbox{~~~~module2=ModuleType(name+'2')~{\#}~creates~an~an~empty~module~}\\ \mbox{~~~~customize(module2,**dic)~{\#}~populates~it~with~dic}\\ \mbox{~~~~return~module2}\\ \mbox{~~}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Notice that the \texttt{sub} function, that modifies the source code of a given module and returns a modified module, requires Python 2.3 to work. This is a due to a subtle bug in \texttt{exec} in Python 2.2. Anyway, the restriction to Python 2.3 allows me to take advantage of one of the most elegant convenience of Python 2.3: the name in the \texttt{types} module acts are type factories and in particular \texttt{ModuleType(s)} returns an (empty) module named \texttt{s}. Here is an example of usage: \begin{quote} \begin{verbatim}>>> import oopp >>> s='GenericPizza(object):' >>> oopp2=oopp.modulesub(s,s+'\n __metaclass__=oopp.Traced',oopp) \end{verbatim} \end{quote} Name clashes are avoided, being 'oopp2' a different module from 'oopp'; we have simultaneously access to both the original hierarchy in 'oopp' (non-traced) and the modified one in 'oopp2' (traced). In particular 'oopp2.CustomizablePizza' is traced and therefore \begin{quote} \begin{verbatim}>>> class PizzaLog(oopp2.CustomizablePizza,oopp2.WithLogger): ... __metaclass__=makecls() >>> marinara=PizzaLog.With(toppinglist=['tomato'])('small')\end{verbatim} \end{quote} gives the output \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{[PizzaLog]~Calling~'{\_}{\_}init{\_}{\_}'~with~arguments}\\ \mbox{(,~'small'){\{}{\}}~...}\\ \mbox{->~'PizzaLog.{\_}{\_}init{\_}{\_}'~called~with~result:~None}\\ \mbox{}\\ \mbox{*****************************************************************************}\\ \mbox{Thu~Mar~27~09:18:28~2003}\\ \mbox{[PizzaLog]~Calling~'{\_}{\_}str{\_}{\_}'~with~arguments}\\ \mbox{(,){\{}{\}}~...}\\ \mbox{[PizzaLog]~Calling~'price'~with~arguments}\\ \mbox{(,){\{}{\}}~...}\\ \mbox{[PizzaLog]~Calling~'toppings{\_}price'~with~arguments}\\ \mbox{(,){\{}{\}}~...}\\ \mbox{->~'PizzaLog.toppings{\_}price'~called~with~result:~0.5}\\ \mbox{}\\ \mbox{->~'PizzaLog.price'~called~with~result:~1.5}\\ \mbox{}\\ \mbox{->~'PizzaLog.{\_}{\_}str{\_}{\_}'~called~with~result:~small~pizza~with~tomato,~cost~{\$}~1.5}\\ \mbox{}\\ \mbox{1.~Created~small~pizza~with~tomato,~cost~{\$}~1.5} \end{flushleft}\end{ttfamily} \end{quote} From that we understand what is happening: \begin{itemize} \item {} \texttt{PizzaLog.{\_}{\_}init{\_}{\_}} calls \texttt{GenericPizza.{\_}{\_}init{\_}{\_}} that defines size and cooperatively calls \texttt{WithLogger.{\_}{\_}init{\_}{\_}} \item {} WithLogger.{\_}{\_}init{\_}{\_} cooperatively calls \texttt{WithCounter.{\_}{\_}init{\_}{\_}} that increments the count attribute; \item {} at this point, the instruction 'print self' in \texttt{WithLogger.{\_}{\_}init{\_}{\_}} calls \texttt{PizzaLog.{\_}{\_}str{\_}{\_}} (inherited from \texttt{GenericPizza.{\_}{\_}str{\_}{\_}}); \item {} \texttt{GenericPizza.{\_}{\_}str{\_}{\_}} calls 'price' that in turns calls 'toppings{\_}price'. \end{itemize} On top of that, notice that the metaclass of 'PizzaLog' is \texttt{{\_}TracedReflective} that has been automagically generated by \texttt{makecls} from the metaclasses of 'CustomizablePizza' (i.e. 'Traced') and of 'WithLogger' (i.e. 'Reflective'); the leading underscore helps to understand the dynamical origin of '{\_}TracedReflective'. It turns out that '{\_}TracedReflective' has a dynamically generated (meta-meta)class: \begin{quote} \begin{verbatim}>>> print type(type(PizzaLog)) #meta-metaclass \end{verbatim} \end{quote} Therefore this example has a non-trivial class hierarchy \begin{quote} \begin{verbatim}>>> print oopp.MRO(PizzaLog) MRO of PizzaLog: 0 - PizzaLog(CustomizablePizza,WithLogger)[Traced] 1 - CustomizablePizza(GenericPizza,Customizable)[Traced] 2 - GenericPizza(object)[Traced] 3 - WithLogger(WithCounter,Customizable,PrettyPrinted) 4 - WithCounter(object) 5 - Customizable(object) 6 - PrettyPrinted(object) 7 - object()\end{verbatim} \end{quote} a non-trivial metaclass hierarchy, \begin{quote} \begin{verbatim}>>> print oopp.MRO(type(PizzaLog)) # the metaclass hierarchy MRO of Traced: 0 - Traced(Reflective)[WithWrappingCapabilities] 1 - Reflective(type) 2 - type(object) 3 - object()\end{verbatim} \end{quote} and a non-trivial meta-metaclass hierarchy: \begin{quote} \begin{verbatim}>>> print oopp.MRO(type(type(PizzaLog))) # the meta-metaclass hierarchy MRO of WithWrappingCapabilities: 0 - WithWrappingCapabilities(BracketCallable) 1 - CallableWithBrackets(type) 2 - type(object) 3 - object()\end{verbatim} \end{quote} Pretty much complicated, isn't it ? ;) This example is there to show what kind of maintenance one can have with programs doing a large use of metaclasses, particularly, when they should be understood by somebody else than the autor ... %___________________________________________________________________________ \hypertarget{metaclass-regenerated-hierarchies}{} \pdfbookmark[1]{Metaclass regenerated hierarchies}{metaclass-regenerated-hierarchies} \subsection*{Metaclass regenerated hierarchies} \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{import~types}\\ \mbox{~~}\\ \mbox{def~hierarchy(self,cls):}\\ \mbox{~~~~d=dict([(t.{\_}{\_}name{\_}{\_},t)~for~t~in~vars(types).itervalues()}\\ \mbox{~~~~~~~~~~~~if~isinstance(t,type)])}\\ \mbox{~~~~def~new(c):}\\ \mbox{~~~~~~~~bases=tuple([d[b.{\_}{\_}name{\_}{\_}]~for~b~in~c.{\_}{\_}bases{\_}{\_}])}\\ \mbox{~~~~~~~~return~self(c.{\_}{\_}name{\_}{\_},~bases,~c.{\_}{\_}dict{\_}{\_}.copy())}\\ \mbox{~~~~mro=list(cls.{\_}{\_}mro{\_}{\_}[:-1])}\\ \mbox{~~~~mro.reverse()}\\ \mbox{~~~~for~c~in~mro:}\\ \mbox{~~~~~~~~if~not~c.{\_}{\_}name{\_}{\_}~in~d:}\\ \mbox{~~~~~~~~~~~~d[c.{\_}{\_}name{\_}{\_}]=new(c)}\\ \mbox{~~~~customize(self,**d)}\\ \mbox{}\\ \mbox{ClsFactory.hierarchy=hierarchy}\\ \mbox{traced=ClsFactory[Traced,Reflective]} \end{flushleft}\end{ttfamily} \end{quote} Unfortunately, this approach does not work if the original hierarchy makes named cooperative super calls. Therefore the source-code run-time modification has its advantages. %___________________________________________________________________________ \hypertarget{the-programmable-programming-language}{} \pdfbookmark[0]{THE PROGRAMMABLE PROGRAMMING LANGUAGE}{the-programmable-programming-language} \section*{THE PROGRAMMABLE PROGRAMMING LANGUAGE} \begin{quote} \emph{I think that lisp is a better applications language than Python. However, Python is close enough, or at least so much better than the alternatives, that Python's social and glue language advantages are often decisive.} -- Andy Freeman on c.l.p. \end{quote} I go in \emph{really} DEEP BLACK MAGIC here. Lisp has been called the \emph{programmable programming language} [\hyperlink{id43}{22}] since its macros allow the programmer to change the \emph{syntax} of the language. Python has no macros and the syntax of the language cannot be changed. Nevertheless, Python metaclasses allows to change the \emph{semantics} of the language. In this sense, they are even more powerful and more dangerous than Lisp macros. Python metaclass allow the user to customize the language (if not its syntax). This is cool enough, however it can make your programs unreadable by others. The techniques explained in this chapter should be used with care. Nevertheless, I trust the judgement of the programmer who has been able to reach this chapter, and I don't mind providing him further rope to shoot in his/her foot ;) \begin{figure}[b]\hypertarget{id48}[24] Paul Graham, 'OnLisp' citing \end{figure} %___________________________________________________________________________ \hypertarget{enhancing-the-python-language}{} \pdfbookmark[1]{Enhancing the Python language}{enhancing-the-python-language} \subsection*{Enhancing the Python language} Let me start with some minor usage of metaclasses. In this section I will show how the user can implement in few lines features that are built-in in other languages, through a minimal usage of metaclasses. For instance, suppose one wants to define a class which cannot be derived: in Java this can be done with the ''final`` keyword. In Python there is no need to add a new keyword to the language: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~NonDerivableError(Exception):~pass}\\ \mbox{}\\ \mbox{class~Final(type):~{\#}~better~derived~from~WithCounter,type}\\ \mbox{~~~~"Instances~of~Final~cannot~be~derived"}\\ \mbox{~~~~def~{\_}{\_}new{\_}{\_}(meta,name,bases,dic):}\\ \mbox{~~~~~~~~try:}\\ \mbox{~~~~~~~~~~~~meta.already{\_}called~is~True}\\ \mbox{~~~~~~~~except~AttributeError:~{\#}~not~already~called}\\ \mbox{~~~~~~~~~~~~meta.already{\_}called=True}\\ \mbox{~~~~~~~~~~~~return~super(Final,meta).{\_}{\_}new{\_}{\_}(meta,name,bases,dic)}\\ \mbox{~~~~~~~~else:~{\#}if~already~called}\\ \mbox{~~~~~~~~~~~~raise~NonDerivableError("I~cannot~derive~from~{\%}s"~{\%}~bases)}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} Here there is an example of usage: \begin{quote} \begin{verbatim}>>> from oopp import Final >>> class C: ... __metaclass__=Final ... >>> class D(C): pass #error ... NonDerivableError: D not created from (,)\end{verbatim} \end{quote} It is interesting to notice that a similar effect can be reached with a \texttt{singletonClass} class factory: a 'MetaSingleton' inherits from \texttt{Singleton} and from 'type' (therefore it is a metaclass): \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{class~S(Singleton,type):~pass}\\ \mbox{singletonClass=ClsFactory[S]}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} If we write \begin{quote} \begin{verbatim}>>> from oopp import singletonClass >>> C=singletonClass() >>> class D(C): ... pass\end{verbatim} \end{quote} we see that actually 'D' is not a new instance of 'Singleton', but it coincides with 'C', instead: \begin{quote} \begin{verbatim}>>> id(C),id(D) (135622140, 135622140) >>> C is D True >>> type(C) >>> type(C).__bases__ (, ) >>> c=C(); d=D() >>> id(c),id(d) (1075378028, 1075378924)\end{verbatim} \end{quote} Notice the order: 'SingletonClass' must inherit from 'Singleton' first and from \texttt{Class} second, otherwise the \texttt{Class.{\_}{\_}new{\_}{\_}} method would override the \texttt{Singleton.{\_}{\_}new{\_}{\_}}, therefore losing the 'Singleton' basic property of having only one instance. On the other hand, in the correct order, 'Singleton' first and 'Class' second, the inheritance diagram is \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{~~~~~~~~~~~~~~~object~~~5}\\ \mbox{~~~~~~~~~~~~~({\_}{\_}new{\_}{\_})}\\ \mbox{~~~~~~~~~~~~/~~~~~~~~~~{\textbackslash}}\\ \mbox{~~~~~~~~~~~/~~~~~~~~~~~~{\textbackslash}}\\ \mbox{2~~~~~~WithCounter~~~~~~~~~~type~~~~4}\\ \mbox{~~~~~~({\_}{\_}new{\_}{\_})~~~~~~~({\_}{\_}new{\_}{\_})}\\ \mbox{~~~~~~~~~~|~~~~~~~~~~~~~~|}\\ \mbox{~~~~~~~~~~|~~~~~~~~~~~~~~|}\\ \mbox{1~~~~~Singleton~~~~~~~~~Class~~~~3}\\ \mbox{~~~~~~({\_}{\_}new{\_}{\_})~~~~~~~({\_}{\_}new{\_}{\_})}\\ \mbox{~~~~~~~~~~~{\textbackslash}~~~~~~~~~~~~~/}\\ \mbox{~~~~~~~~~~~~{\textbackslash}~~~~~~~~~~~/}\\ \mbox{~~~~~~~~~~~~SingletonClass~~~~0}\\ \mbox{~~~~~~~~~(Singleton.{\_}{\_}new{\_}{\_})} \end{flushleft}\end{ttfamily} \begin{ttfamily}\begin{flushleft} \mbox{~~~~~~object}\\ \mbox{~~~~~/~~~~~{\textbackslash}}\\ \mbox{~~~~/~~~~~~~|}\\ \mbox{WithCounter~~~~~|~}\\ \mbox{~~~~|~~~~~~~|}\\ \mbox{Singleton~~type}\\ \mbox{~~~~~{\textbackslash}~~~~~/}\\ \mbox{~~~~~~{\textbackslash}~~~/}\\ \mbox{~~~MetaSingleton}\\ \mbox{~~~~~~~~:}\\ \mbox{~~~~~~~~:~~~~~~~}\\ \mbox{~~~~~~~~:~~~instantiation}\\ \mbox{~~~~~~~~:}\\ \mbox{~~~~~~~~:}\\ \mbox{~~~~~~C~=~D} \end{flushleft}\end{ttfamily} \end{quote} whereas 'SingletonClass' inherits \texttt{Singleton.{\_}{\_}new{\_}{\_}} which, trough the \texttt{super} mechanism, calls 'type.{\_}{\_}new{\_}{\_}' and therefore creates the class 'C'. Notice that class 'D' is never created, it is simply an alias for 'C'. I think it is simpler to write down the class 'Final' explicitely (explicit is better than implicit) as I did; however a fanatic of code reuse could derive it from 'SingletonClass': \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{from~oopp~import~*}\\ \mbox{}\\ \mbox{class~Final(Singleton,type):}\\ \mbox{~~~~"Inherits~the~'instance'~attribute~from~Singleton~(default~None)"}\\ \mbox{~~~~def~{\_}{\_}new{\_}{\_}(meta,name,bases,dic):}\\ \mbox{~~~~~~~~if~meta.counter==0:~{\#}~first~call}\\ \mbox{~~~~~~~~~~~~return~super(Final,meta).{\_}{\_}new{\_}{\_}(meta,name,bases,dic)}\\ \mbox{~~~~~~~~else:}\\ \mbox{~~~~~~~~~~~~raise~NonDerivableError("I~cannot~derive~from~{\%}s"~{\%}~bases)}\\ \mbox{~~}\\ \mbox{class~C:~~{\_}{\_}metaclass{\_}{\_}=Final}\\ \mbox{}\\ \mbox{try:}\\ \mbox{~~~~class~D(C):~pass}\\ \mbox{except~NonDerivableError,e:}\\ \mbox{~~~~print~e}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} The reader can check that this script has the correct output ''I cannot derive from {\textless}class 'oopp.C'{\textgreater}``. I leave to the reader to understand the issues with trying to implement 'NonDerivable' from 'NonInstantiable'. {\#}And why an inner metaclass would not work. %___________________________________________________________________________ \hypertarget{restricting-python-dynamism}{} \pdfbookmark[1]{Restricting Python dynamism}{restricting-python-dynamism} \subsection*{Restricting Python dynamism} \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{def~frozen(self,name,value):}\\ \mbox{~~~~if~hasattr(self,name):}\\ \mbox{~~~~~~~~type(self).{\_}{\_}bases{\_}{\_}[0].{\_}{\_}setattr{\_}{\_}(self,name,value)~}\\ \mbox{~~~~else:}\\ \mbox{~~~~~~~~raise~AttributeError("You~cannot~add~attributes~to~{\%}s"~{\%}~self)}\\ \mbox{}\\ \mbox{class~Frozen(object):}\\ \mbox{~~~~"""Subclasses~of~Frozen~are~frozen,~i.e.~it~is~impossibile~to~add}\\ \mbox{~~~~~new~attributes~to~them~and~their~instances"""}\\ \mbox{~~~~{\_}{\_}setattr{\_}{\_}~=~frozen}\\ \mbox{~~~~class~{\_}{\_}metaclass{\_}{\_}(type):}\\ \mbox{~~~~~~~~{\_}{\_}setattr{\_}{\_}~=~frozen}\\ \mbox{}\\ \mbox{{\#}}\\ \mbox{}\\ \mbox{}\\ \mbox{{\#}}\\ \mbox{}\\ \mbox{from~oopp~import~*}\\ \mbox{}\\ \mbox{class~C(Frozen):}\\ \mbox{~~~~c=1}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(self):~}\\ \mbox{~~~~~~~~{\#}self.x=5~{\#}~won't~work~anymore,~{\_}{\_}new{\_}{\_}~will~be~okay}\\ \mbox{~~~~~~~~pass}\\ \mbox{}\\ \mbox{class~D(C):}\\ \mbox{~~~~d=2}\\ \mbox{~~}\\ \mbox{C.c=2}\\ \mbox{}\\ \mbox{print~D().d}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} %___________________________________________________________________________ \hypertarget{changing-the-language-without-changing-the-language}{} \pdfbookmark[1]{Changing the language without changing the language}{changing-the-language-without-changing-the-language} \subsection*{Changing the language without changing the language} In Lisp the user has the possibility of changing the syntax of the language to suit her purposes (or simply to fit her taste). In Python, the user cannot change the basic grammar of the language, nevertheless, to a great extent, metaclasses allows to emulate this effect. Notice that using metaclasses to this aim is not necessarely a good idea, since once you start changing the Python standard behaviour, it will become impossible for others to understand your programs (which is what happened to Lisp ;). Let me show how metaclasses can be used to provide notational convenience (i.e. syntactic sugar) for Python. As first example, I will show how we may use metaclasses to provide some convenient notation for staticmethods and classmethods: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{class~MetaSugar(type):}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(cls,name,bases,clsdict):}\\ \mbox{~~~~~~~~for~key,value~in~clsdict.iteritems():}\\ \mbox{~~~~~~~~~~~~if~key.startswith("static{\_}"):}\\ \mbox{~~~~~~~~~~~~~~~~setattr(cls,key[7:],staticmethod(value))}\\ \mbox{~~~~~~~~~~~~elif~key.startwith("class{\_}"):}\\ \mbox{~~~~~~~~~~~~~~~~setattr(cls,key[6:],classmethod(value))} \end{flushleft}\end{ttfamily} \end{quote} The same effect can be obtained trough normal inheritance \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{class~SyntacticSugar(object):}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(self):}\\ \mbox{~~~~~~~~for~k,v~in~self.{\_}{\_}class{\_}{\_}.{\_}{\_}dict{\_}{\_}.iteritems():}\\ \mbox{~~~~~~~~~~~~if~k.startswith('static{\_}'):}\\ \mbox{~~~~~~~~~~~~~~~~self.{\_}{\_}class{\_}{\_}.{\_}{\_}dict{\_}{\_}[k[7:]]~=~staticmethod(v)}\\ \mbox{~~~~~~~~~~~~if~k.startswith('static{\_}'):}\\ \mbox{~~~~~~~~~~~~~~~~self.{\_}{\_}class{\_}{\_}.{\_}{\_}dict{\_}{\_}[k[7:]]~=~staticmethod(v)} \end{flushleft}\end{ttfamily} \end{quote} Let me now implement some syntactic sugar for the {\_}{\_}metaclass{\_}{\_} hook. \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}}\\ \mbox{}\\ \mbox{import~re}\\ \mbox{squarednames=re.compile('{\textbackslash}[([A-Za-z{\_}][{\textbackslash}w{\textbackslash}.,~]*){\textbackslash}]')}\\ \mbox{}\\ \mbox{def~inferredfromdocstring(name,bases,dic):}\\ \mbox{~~~~docstring=dic['{\_}{\_}doc{\_}{\_}']}\\ \mbox{~~~~match=squarednames.match(docstring)}\\ \mbox{~~~~if~not~match:~return~ClsFactory[Reflective](name,bases,dic)}\\ \mbox{~~~~metanames=[name.strip()~for~name~in~match.group(1).split(',')]}\\ \mbox{~~~~metaname=''.join(metanames)~~}\\ \mbox{~~~~if~len(metanames)>1:~{\#}~creates~a~new~metaclass}\\ \mbox{~~~~~~~~metaclass=type(metaname,tuple(map(eval,metanames)),{\{}{\}})}\\ \mbox{~~~~else:}\\ \mbox{~~~~~~~~metaclass=eval(metaname)}\\ \mbox{~~~~return~ClsFactory[metaclass](name,bases,dic)}\\ \mbox{}\\ \mbox{{\#}}\\ \mbox{}\\ \mbox{{\#}}\\ \mbox{}\\ \mbox{from~oopp~import~*}\\ \mbox{{\_}{\_}metaclass{\_}{\_}~=~inferredfromdocstring}\\ \mbox{class~B:}\\ \mbox{~~~~"Do~nothing~class"}\\ \mbox{}\\ \mbox{class~C:~}\\ \mbox{~~~~"[Reflective]"}\\ \mbox{~~~~"~Do~nothing~class"}\\ \mbox{}\\ \mbox{class~D:}\\ \mbox{~~~~"[WithLogger,Final]"}\\ \mbox{~~~~"Do~nothing~class"}\\ \mbox{}\\ \mbox{class~E(C):}\\ \mbox{~~~~pass}\\ \mbox{}\\ \mbox{{\#}} \end{flushleft}\end{ttfamily} \end{quote} With output: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{*****************************************************************************}\\ \mbox{Fri~Feb~21~09:35:58~2003}\\ \mbox{Creating~class~Logged{\_}C~descending~from~(),}\\ \mbox{instance~of~}\\ \mbox{}\\ \mbox{Logged{\_}C~dictionary:}\\ \mbox{~{\_}{\_}doc{\_}{\_}~=~Do~nothing~class}\\ \mbox{*****************************************************************************}\\ \mbox{Fri~Feb~21~09:35:58~2003}\\ \mbox{Creating~class~Logged{\_}Final{\_}D~descending~from~(),}\\ \mbox{instance~of~}\\ \mbox{}\\ \mbox{Logged{\_}Final{\_}D~dictionary:}\\ \mbox{{\_}{\_}doc{\_}{\_}~=~Do~nothing~class}\\ \mbox{*****************************************************************************}\\ \mbox{Fri~Feb~21~09:35:58~2003}\\ \mbox{Creating~class~E~descending~from~(,),}\\ \mbox{instance~of~}\\ \mbox{}\\ \mbox{E~dictionary:}\\ \mbox{~} \end{flushleft}\end{ttfamily} \end{quote} At the end, let me point out few observations: Metaclasses can be used to provide syntactic sugar, as I have shown in the previous example. However, I have given the previous routines as a proof of concept: I do \emph{not} use these routines in my actual code for many good reasons: \newcounter{listcnt34} \begin{list}{\arabic{listcnt34}.} { \usecounter{listcnt34} \setlength{\rightmargin}{\leftmargin} } \item {} At the end a convenient notation will be provided in Python 2.4 \item {} I don't want to use magic tricks on my code, I want others to be able to understand what the code is doing; \item {} I want to be able myself to understand my own code in six months from today ;) \end{list} Anyway, I think it is a good thing to know about this potentiality of metaclasses, that can turn out to be very convenient in certain applications: but this does not mean that should be blindly used and/or abused. In other words: with great powers come great responsabilities ;) %___________________________________________________________________________ \hypertarget{recognizing-magic-comments}{} \pdfbookmark[1]{Recognizing magic comments}{recognizing-magic-comments} \subsection*{Recognizing magic comments} In this section, I will begin to unravel the secrets of the black magic art of changing Python semantics and I will show that with few lines involving metaclasses and the standard library 'inspect' module, even comments can be made significant! (let me continue with my series ''how to do what should not be done``). To this aim, I need a brief digression on regular expressions. \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{class~RecognizesMagicComments(object):}\\ \mbox{~~~form=r'def~{\%}s(NAME)(args):{\#}!{\textbackslash}s?staticmethod'}\\ \mbox{~~~class~{\_}{\_}metaclass{\_}{\_}(type):}\\ \mbox{~~~~~~~def~{\_}{\_}new{\_}{\_}(meta,name,bases,dic):}\\ \mbox{~~~~~~~~~~~code=[]}\\ \mbox{~~~~~~~~~~~for~attr~in~dic:}\\ \mbox{~~~~~~~~~~~~~~~source=inspect.getsource(dic[attr]).splitlines()}\\ \mbox{~~~~~~~~~~~~~~~for~line~in~source:}\\ \mbox{~~~~~~~~~~~~~~~~~~~split=line.split('{\#}!')}\\ \mbox{~~~~~~~~~~~~~~~~~~~if~len(split)==2:}\\ \mbox{~~~~~~~~~~~~~~~~~~~~~~~descriptor=split[1];~code.append(split[0])}\\ \mbox{~~~~~~~~~~~~~~~~~~~else:~code.append(line)}\\ \mbox{~~~~~~~~~~~~~}\\ \mbox{class~C(RecognizesMagicComments):}\\ \mbox{~~~~{\#}!staticmethod}\\ \mbox{~~~~def~f(x):~{\#}!staticmethod}\\ \mbox{~~~~~~~~return~x} \end{flushleft}\end{ttfamily} \end{quote} %___________________________________________________________________________ \hypertarget{interpreting-python-source-code-on-the-fly}{} \pdfbookmark[1]{Interpreting Python source code on the fly}{interpreting-python-source-code-on-the-fly} \subsection*{Interpreting Python source code on the fly} At this point, I can really go \emph{DEEP} in black magic. \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{import~sys,~inspect,~linecache,~re}\\ \mbox{}\\ \mbox{def~cls{\_}source(name,module):}\\ \mbox{~~~~lines~=~linecache.getlines(inspect.getsourcefile(module))}\\ \mbox{~~~~if~not~lines:~raise~IOError,~'could~not~get~source~code'}\\ \mbox{~~~~pat~=~re.compile(r'{\textasciicircum}{\textbackslash}s*class{\textbackslash}s*'~+~name~+~r'{\textbackslash}b')}\\ \mbox{~~~~for~i~in~range(len(lines)):}\\ \mbox{~~~~~~~~if~pat.match(lines[i]):~break}\\ \mbox{~~~~else:~raise~IOError,~'could~not~find~class~definition'}\\ \mbox{~~~~lines,~lnum~=~inspect.getblock(lines[i:]),~i~+~1}\\ \mbox{~~~~return~''.join(lines)}\\ \mbox{}\\ \mbox{class~Interpreter(object):}\\ \mbox{~~~~def~{\_}{\_}init{\_}{\_}(self,CPO):~{\#}~possible~composition~of~code~processing~opers}\\ \mbox{~~~~~~~~self.repl=CPO}\\ \mbox{~~~~def~{\_}{\_}call{\_}{\_}(self,name,bases,dic):}\\ \mbox{~~~~~~~~try:}\\ \mbox{~~~~~~~~~~~modulename=dic['{\_}{\_}module{\_}{\_}']~{\#}~module~where~the~class~is~defined}\\ \mbox{~~~~~~~~except~KeyError:~{\#}~no~{\_}{\_}module{\_}{\_}~attribute}\\ \mbox{~~~~~~~~~~~raise~IOError("Class~{\%}s~cannot~be~defined~dynamically~or~in~the{\textbackslash}n"}\\ \mbox{~~~~~~~~~~~"interpreter~and~the~source~code~cannot~came~from~a~pipe"{\%}~name)}\\ \mbox{~~~~~~~~module=sys.modules[modulename]~}\\ \mbox{~~~~~~~~source=self.repl(cls{\_}source(name,module))}\\ \mbox{~~~~~~~~source=re.sub('{\_}{\_}metaclass{\_}{\_}=.*','{\_}{\_}metaclass{\_}{\_}=type',source)}\\ \mbox{~~~~~~~~{\#}print~source}\\ \mbox{~~~~~~~~loc={\{}{\}};~exec~source~in~vars(module),loc}\\ \mbox{~~~~~~~~return~loc[name]}\\ \mbox{}\\ \mbox{regexp{\_}expand=Interpreter(regexp)} \end{flushleft}\end{ttfamily} \end{quote} %___________________________________________________________________________ \hypertarget{implementing-lazy-evaluation}{} \pdfbookmark[1]{Implementing lazy evaluation}{implementing-lazy-evaluation} \subsection*{Implementing lazy evaluation} At this point of our knowledge, it becomes trivial to implement lazy evaluation and then a ternary operator. (My original, simpler, implementation is posted on c.l.p.; see the thread 'PEP 312 (and thus 308) implemented with a black magic trick') %___________________________________________________________________________ \hypertarget{implementing-a-ternary-operator}{} \pdfbookmark[1]{Implementing a ternary operator}{implementing-a-ternary-operator} \subsection*{Implementing a ternary operator} \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{{\#}~module~ternary.py}\\ \mbox{}\\ \mbox{"PEP~308~and~312~implemented~via~a~metaclass-powered~dirty~trick"}\\ \mbox{}\\ \mbox{import~inspect,{\_}{\_}main{\_}{\_}}\\ \mbox{}\\ \mbox{{\#}~the~ternary~operator:}\\ \mbox{}\\ \mbox{def~if{\_}(cond,f,g):}\\ \mbox{~~~~"Short~circuiting~ternary~operator~implemented~via~lambdas"}\\ \mbox{~~~~if~cond:~return~f()}\\ \mbox{~~~~else:~return~g()}\\ \mbox{}\\ \mbox{{\#}~the~metaclass~black~magic:}\\ \mbox{}\\ \mbox{class~DirtyTrick(type):}\\ \mbox{~~~~"""Cooperative~metaclass~that~looks~at~the~source~code~of~its~instances~}\\ \mbox{~~~~and~replaces~the~string~'{\textasciitilde}'~with~'lambda~:'~before~the~class~creation"""}\\ \mbox{~~~~def~{\_}{\_}new{\_}{\_}(meta,name,bases,dic):}\\ \mbox{~~~~~~~~for~attr~in~dic.values():}\\ \mbox{~~~~~~~~~~~~if~inspect.isfunction(attr):~}\\ \mbox{~~~~~~~~~~~~~~~~code=inspect.getsource(attr)}\\ \mbox{~~~~~~~~~~~~~~~~if~code.find('{\textasciitilde}')==-1:~continue~{\#}~no~'{\textasciitilde}'~found,~skip}\\ \mbox{~~~~~~~~~~~~~~~~code=code.replace('{\textasciitilde}','lambda~:')}\\ \mbox{~~~~~~~~~~~~~~~~code=dedent(code)+'{\textbackslash}n'}\\ \mbox{~~~~~~~~~~~~~~~~exec~code~in~{\_}{\_}main{\_}{\_}.{\_}{\_}dict{\_}{\_},dic~{\#}~modifies~dic}\\ \mbox{~~~~~~~~return~super(DirtyTrick,meta).{\_}{\_}new{\_}{\_}(meta,name,bases,dic)}\\ \mbox{}\\ \mbox{{\#}~a~convenient~base~class:}\\ \mbox{}\\ \mbox{class~RecognizesImplicitLambdas:}\\ \mbox{~~~~"Children~of~this~class~do~recognize~implicit~lambdas"}\\ \mbox{~~~~{\_}{\_}metaclass{\_}{\_}=DirtyTrick} \end{flushleft}\end{ttfamily} \end{quote} Here there is an example of usage: \begin{quote} \begin{ttfamily}\begin{flushleft} \mbox{from~ternary~import~if{\_},~RecognizesImplicitLambdas}\\ \mbox{from~math~import~sqrt}\\ \mbox{}\\ \mbox{class~C(RecognizesImplicitLambdas):}\\ \mbox{~~~def~safesqrt(self,x):}\\ \mbox{~~~~~~~~return~if{\_}(~x>0,~{\textasciitilde}sqrt(x),~{\textasciitilde}0)~{\#}short-circuiting~ternary~operator}\\ \mbox{}\\ \mbox{c=C()}\\ \mbox{print~c.safesqrt(4),~c.safesqrt(-4)~} \end{flushleft}\end{ttfamily} \end{quote} \end{document}