diff options
83 files changed, 13903 insertions, 0 deletions
diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 000000000..5531da43d --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,98 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = LANG=C sphinx-build +PAPER = + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d build/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source + +.PHONY: help clean html web pickle htmlhelp latex changes linkcheck + +help: + @echo "Please use \`make <target>' where <target> is one of" + @echo " dist to make a distribution-ready tree" + @echo " html to make standalone HTML files" + @echo " pickle to make pickle files (usable by e.g. sphinx-web)" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " changes to make an overview over all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + +clean: + -rm -rf build/* source/reference/generated + +dist: html + test -d build/latex || make latex + make -C build/latex all-pdf + -rm -rf build/dist + cp -r build/html build/dist + perl -pi -e 's#^\s*(<li><a href=".*?">NumPy.*?Manual.*?»</li>)#<li><a href="/">Numpy and Scipy Documentation</a> »</li>#;' build/dist/*.html build/dist/*/*.html build/dist/*/*/*.html + cd build/html && zip -9r ../dist/numpy-html.zip . + cp build/latex/*.pdf build/dist + cd build/dist && tar czf ../dist.tar.gz * + +generate: build/generate-stamp +build/generate-stamp: $(wildcard source/reference/*.rst) ext + mkdir -p build + ./ext/autosummary_generate.py source/reference/*.rst \ + -p dump.xml -o source/reference/generated + touch build/generate-stamp + +ext: + svn co http://sphinx.googlecode.com/svn/contrib/trunk/numpyext ext + +html: generate + mkdir -p build/html build/doctrees + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) build/html + python postprocess.py html build/html/*.html + @echo + @echo "Build finished. The HTML pages are in build/html." + +pickle: generate + mkdir -p build/pickle build/doctrees + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) build/pickle + @echo + @echo "Build finished; now you can process the pickle files or run" + @echo " sphinx-web build/pickle" + @echo "to start the sphinx-web server." + +web: pickle + +htmlhelp: generate + mkdir -p build/htmlhelp build/doctrees + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) build/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in build/htmlhelp." + +latex: generate + mkdir -p build/latex build/doctrees + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) build/latex + python postprocess.py tex build/latex/*.tex + @echo + @echo "Build finished; the LaTeX files are in build/latex." + @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ + "run these through (pdf)latex." + +coverage: build + mkdir -p build/coverage build/doctrees + $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) build/coverage + @echo "Coverage finished; see c.txt and python.txt in build/coverage" + +changes: generate + mkdir -p build/changes build/doctrees + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) build/changes + @echo + @echo "The overview file is in build/changes." + +linkcheck: generate + mkdir -p build/linkcheck build/doctrees + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) build/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in build/linkcheck/output.txt." diff --git a/doc/README.txt b/doc/README.txt new file mode 100644 index 000000000..2a7ad82ee --- /dev/null +++ b/doc/README.txt @@ -0,0 +1,40 @@ +NumPy Reference Guide +===================== + +Instructions +------------ +1. Optionally download an XML dump of the newest docstrings from the doc wiki + at ``/pydocweb/dump`` and save it as ``dump.xml``. +2. Run ``make html`` or ``make dist`` + +You can also run ``summarize.py`` to see which parts of the Numpy +namespace are documented. + + +TODO +---- + +* Numberless [*] footnotes cause LaTeX errors. + +* ``See also`` sections are still somehow broken even if some work. + The problem is that Sphinx searches like this:: + + 'name' + 'active_module.name' + 'active_module.active_class.name'. + + Whereas, we would like to have this: + + 'name' + 'active_module.name' + 'parent_of_active_module.name' + 'parent_of_parent_of_active_module.name' + ... + 'numpy.name' + + We can get one step upwards by always using 'numpy' as the active module. + It seems difficult to beat Sphinx to do what we want. + Do we need to change our docstring standard slightly, ie. allow only + leaving the 'numpy.' prefix away? + +* Link resolution doesn't work as intended... eg. `doc.ufunc`_ diff --git a/doc/postprocess.py b/doc/postprocess.py new file mode 100755 index 000000000..1c6ef1b2e --- /dev/null +++ b/doc/postprocess.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +""" +%prog MODE FILES... + +Post-processes HTML and Latex files output by Sphinx. +MODE is either 'html' or 'tex'. + +""" +import re, optparse + +def main(): + p = optparse.OptionParser(__doc__) + options, args = p.parse_args() + + if len(args) < 1: + p.error('no mode given') + + mode = args.pop(0) + + if mode not in ('html', 'tex'): + p.error('unknown mode %s' % mode) + + for fn in args: + f = open(fn, 'r') + try: + if mode == 'html': + lines = process_html(fn, f.readlines()) + elif mode == 'tex': + lines = process_tex(f.readlines()) + finally: + f.close() + + f = open(fn, 'w') + f.write("".join(lines)) + f.close() + +def process_html(fn, lines): + return lines + +def process_tex(lines): + """ + Remove unnecessary section titles from the LaTeX file. + + """ + new_lines = [] + for line in lines: + if (line.startswith(r'\section{numpy.') + or line.startswith(r'\subsection{numpy.') + or line.startswith(r'\subsubsection{numpy.') + or line.startswith(r'\paragraph{numpy.') + or line.startswith(r'\subparagraph{numpy.') + ): + pass # skip! + else: + new_lines.append(line) + return new_lines + +if __name__ == "__main__": + main() diff --git a/doc/source/_static/scipy.css b/doc/source/_static/scipy.css new file mode 100644 index 000000000..bfb42a567 --- /dev/null +++ b/doc/source/_static/scipy.css @@ -0,0 +1,155 @@ +@import "default.css"; + +/** + * Spacing fixes + */ + +div.body p, div.body dd, div.body li { + line-height: 125%; +} + +ul.simple { + margin-top: 0; + margin-bottom: 0; + padding-top: 0; + padding-bottom: 0; +} + +/* spacing around blockquoted fields in parameters/attributes/returns */ +td.field-body > blockquote { + margin-top: 0.1em; + margin-bottom: 0.5em; +} + +/* spacing around example code */ +div.highlight > pre { + padding: 2px 5px 2px 5px; +} + +/* spacing in see also definition lists */ +dl.last > dd { + margin-top: 1px; + margin-bottom: 5px; + margin-left: 30px; +} + +/** + * Hide dummy toctrees + */ + +ul { + padding-top: 0; + padding-bottom: 0; + margin-top: 0; + margin-bottom: 0; +} +ul li { + padding-top: 0; + padding-bottom: 0; + margin-top: 0; + margin-bottom: 0; +} +ul li a.reference { + padding-top: 0; + padding-bottom: 0; + margin-top: 0; + margin-bottom: 0; +} + +/** + * Make high-level subsections easier to distinguish from top-level ones + */ +div.body h3 { + background-color: transparent; +} + +div.body h4 { + border: none; + background-color: transparent; +} + +/** + * Scipy colors + */ + +body { + background-color: rgb(100,135,220); +} + +div.document { + background-color: rgb(230,230,230); +} + +div.sphinxsidebar { + background-color: rgb(230,230,230); +} + +div.related { + background-color: rgb(100,135,220); +} + +div.sphinxsidebar h3 { + color: rgb(0,102,204); +} + +div.sphinxsidebar h3 a { + color: rgb(0,102,204); +} + +div.sphinxsidebar h4 { + color: rgb(0,82,194); +} + +div.sphinxsidebar p { + color: black; +} + +div.sphinxsidebar a { + color: #355f7c; +} + +div.sphinxsidebar ul.want-points { + list-style: disc; +} + +.field-list th { + color: rgb(0,102,204); +} + +/** + * Extra admonitions + */ + +div.tip { + background-color: #ffffe4; + border: 1px solid #ee6; +} + +/* +div.admonition-example { + background-color: #e4ffe4; + border: 1px solid #ccc; +}*/ + + +/** + * Styling for field lists + */ + +table.field-list th { + border-left: 1px solid #aaa !important; + padding-left: 5px; +} + +table.field-list { + border-collapse: separate; + border-spacing: 10px; +} + +/** + * Styling for footnotes + */ + +table.footnote td, table.footnote th { + border: none; +} diff --git a/doc/source/_templates/indexcontent.html b/doc/source/_templates/indexcontent.html new file mode 100644 index 000000000..523fa8fe3 --- /dev/null +++ b/doc/source/_templates/indexcontent.html @@ -0,0 +1,55 @@ +{% extends "defindex.html" %} +{% block tables %} + <p><strong>Parts of the documentation:</strong></p> + <table class="contentstable" align="center"><tr> + <td width="50%"> + <p class="biglink"><a class="biglink" href="{{ pathto("user/index") }}">Numpy User Guide</a><br/> + <span class="linkdescr">start here</span></p> + <p class="biglink"><a class="biglink" href="{{ pathto("reference/index") }}">Numpy Reference</a><br/> + <span class="linkdescr">reference documentation</span></p> + </td></tr> + </table> + + <p><strong>Indices and tables:</strong></p> + <table class="contentstable" align="center"><tr> + <td width="50%"> + <p class="biglink"><a class="biglink" href="{{ pathto("modindex") }}">Module Index</a><br/> + <span class="linkdescr">quick access to all modules</span></p> + <p class="biglink"><a class="biglink" href="{{ pathto("genindex") }}">General Index</a><br/> + <span class="linkdescr">all functions, classes, terms</span></p> + <p class="biglink"><a class="biglink" href="{{ pathto("glossary") }}">Glossary</a><br/> + <span class="linkdescr">the most important terms explained</span></p> + </td><td width="50%"> + <p class="biglink"><a class="biglink" href="{{ pathto("search") }}">Search page</a><br/> + <span class="linkdescr">search this documentation</span></p> + <p class="biglink"><a class="biglink" href="{{ pathto("contents") }}">Complete Table of Contents</a><br/> + <span class="linkdescr">lists all sections and subsections</span></p> + </td></tr> + </table> + + <p><strong>Meta information:</strong></p> + <table class="contentstable" align="center"><tr> + <td width="50%"> + <p class="biglink"><a class="biglink" href="{{ pathto("bugs") }}">Reporting bugs</a></p> + <p class="biglink"><a class="biglink" href="{{ pathto("about") }}">About NumPy</a></p> + </td><td width="50%"> + <p class="biglink"><a class="biglink" href="{{ pathto("license") }}">License of Numpy</a></p> + </td></tr> + </table> + + <h2>Acknowledgements</h2> + <p> + Large parts of this manual originate from Travis E. Oliphant's book + <a href="http://www.tramy.us/">"Guide to Numpy"</a> (which generously entered + Public Domain in August 2008). The reference documentation for many of + the functions are written by numerous contributors and developers of + Numpy, both prior to and during the + <a href="http://scipy.org/Developer_Zone/DocMarathon2008">Numpy Documentation Marathon</a>. + </p> + <p> + The Documentation Marathon is still ongoing. Please help us write + better documentation for Numpy by joining it! Instructions on how to + join and what to do can be found + <a href="http://scipy.org/Developer_Zone/DocMarathon2008">on the scipy.org website</a>. + </p> +{% endblock %} diff --git a/doc/source/_templates/indexsidebar.html b/doc/source/_templates/indexsidebar.html new file mode 100644 index 000000000..409743a03 --- /dev/null +++ b/doc/source/_templates/indexsidebar.html @@ -0,0 +1,5 @@ + <h3>Resources</h3> + <ul> + <li><a href="http://scipy.org/">Scipy.org website</a></li> + <li> </li> + </ul> diff --git a/doc/source/_templates/layout.html b/doc/source/_templates/layout.html new file mode 100644 index 000000000..5338fc6f6 --- /dev/null +++ b/doc/source/_templates/layout.html @@ -0,0 +1,4 @@ +{% extends "!layout.html" %} +{% block rootrellink %} +<li><a href="{{ pathto('index') }}">{{ shorttitle }}</a>{{ reldelim1 }}</li> +{% endblock %} diff --git a/doc/source/about.rst b/doc/source/about.rst new file mode 100644 index 000000000..6bbb2d186 --- /dev/null +++ b/doc/source/about.rst @@ -0,0 +1,66 @@ +About NumPy +=========== + +`NumPy <http://www.scipy.org/NumpPy/>`__ is the fundamental package +needed for scientific computing with Python. This package contains: + +- a powerful N-dimensional :ref:`array object <arrays>` +- sophisticated :ref:`(broadcasting) functions <ufuncs>` +- basic :ref:`linear algebra functions <routines.linalg>` +- basic :ref:`Fourier transforms <routines.fft>` +- sophisticated :ref:`random number capabilities <routines.random>` +- tools for integrating Fortran code +- tools for integrating C/C++ code + +Besides its obvious scientific uses, *NumPy* can also be used as an +efficient multi-dimensional container of generic data. Arbitrary +data-types can be defined. This allows *NumPy* to seamlessly and +speedily integrate with a wide-variety of databases. + +NumPy is a successor for two earlier scientific Python libraries: +NumPy derives from the old *Numeric* code base and can be used +as a replacement for *Numeric*. It also adds the features introduced +by *Numarray* and can also be used to replace *Numarray*. + +NumPy community +--------------- + +Numpy is a distributed, volunteer open-source project. *You* can help +us make it better; if you believe something should be improved either +in functionality or in documentation, don't hesitate to contact us --- or +even better, contact us and participate in fixing the problem. + +Our main means of communication are: + +- `scipy.org website <http://scipy.org/>`__ + +- `Mailing lists <http://scipy.org/Mailing_Lists>`__ + +- `Numpy Trac <http://projects.scipy.org/scipy/numpy>`__ (bug "tickets" go here) + +More information about the development of Numpy can be found at +http://scipy.org/Developer_Zone + +If you want to fix issues in this documentation, the easiest way +is to participate in `our ongoing documentation marathon +<http://scipy.org/Developer_Zone/DocMarathon2008>`__. + + +About this documentation +======================== + +Conventions +----------- + +Names of classes, objects, constants, etc. are given in **boldface** font. +Often they are also links to a more detailed documentation of the +referred object. + +This manual contains many examples of use, usually prefixed with the +Python prompt ``>>>`` (which is not a part of the example code). The +examples assume that you have first entered:: + +>>> import numpy as np + +before running the examples. + diff --git a/doc/source/bugs.rst b/doc/source/bugs.rst new file mode 100644 index 000000000..cd2c5d3e8 --- /dev/null +++ b/doc/source/bugs.rst @@ -0,0 +1,23 @@ +************** +Reporting bugs +************** + +File bug reports or feature requests, and make contributions +(e.g. code patches), by submitting a "ticket" on the Trac pages: + +- Numpy Trac: http://scipy.org/scipy/numpy + +Because of spam abuse, you must create an account on our Trac in order +to submit a ticket, then click on the "New Ticket" tab that only +appears when you have logged in. Please give as much information as +you can in the ticket. It is extremely useful if you can supply a +small self-contained code snippet that reproduces the problem. Also +specify the component, the version you are referring to and the +milestone. + +Report bugs to the appropriate Trac instance (there is one for NumPy +and a different one for SciPy). There are also read-only mailing lists +for tracking the status of your bug ticket. + +More information can be found on the http://scipy.org/Developer_Zone +website. diff --git a/doc/source/conf.py b/doc/source/conf.py new file mode 100644 index 000000000..153176878 --- /dev/null +++ b/doc/source/conf.py @@ -0,0 +1,229 @@ +# -*- coding: utf-8 -*- + +import sys, os + +# If your extensions are in another directory, add it here. If the directory +# is relative to the documentation root, use os.path.abspath to make it +# absolute, like shown here. +sys.path.append(os.path.abspath('../ext')) + +# Check Sphinx version +import sphinx +if sphinx.__version__ < "0.5": + raise RuntimeError("Sphinx 0.5.dev or newer required") + + +# ----------------------------------------------------------------------------- +# General configuration +# ----------------------------------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.pngmath', 'numpydoc', + 'phantom_import', 'autosummary', 'sphinx.ext.intersphinx', + 'sphinx.ext.coverage'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The master toctree document. +#master_doc = 'index' + +# General substitutions. +project = 'NumPy' +copyright = '2008, The Scipy community' + +# The default replacements for |version| and |release|, also used in various +# other places throughout the built documents. +# +# The short X.Y version. +version = '1.2' +# The full version, including alpha/beta/rc tags. +release = '1.2.dev' + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +today_fmt = '%B %d, %Y' + +# List of documents that shouldn't be included in the build. +#unused_docs = [] + +# The reST default role (used for this markup: `text`) to use for all documents. +default_role = "autolink" + +# List of directories, relative to source directories, that shouldn't be searched +# for source files. +exclude_dirs = [] + +# If true, '()' will be appended to :func: etc. cross-reference text. +add_function_parentheses = False + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + + +# ----------------------------------------------------------------------------- +# HTML output +# ----------------------------------------------------------------------------- + +# The style sheet to use for HTML and HTML Help pages. A file of that name +# must exist either in Sphinx' static/ path, or in one of the custom paths +# given in html_static_path. +html_style = 'scipy.css' + +# The name for this set of Sphinx documents. If None, it defaults to +# "<project> v<release> documentation". +html_title = "%s v%s Manual (DRAFT)" % (project, version) + +# The name of an image file (within the static path) to place at the top of +# the sidebar. +html_logo = 'scipyshiny_small.png' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +html_sidebars = { + 'index': 'indexsidebar.html' +} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +html_additional_pages = { + 'index': 'indexcontent.html', +} + +# If false, no module index is generated. +html_use_modindex = True + +# If true, the reST sources are included in the HTML build as _sources/<name>. +#html_copy_source = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a <link> tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# If nonempty, this is the file name suffix for HTML files (e.g. ".html"). +#html_file_suffix = '.html' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'NumPydoc' + +# Pngmath should try to align formulas properly +pngmath_use_preview = True + + +# ----------------------------------------------------------------------------- +# LaTeX output +# ----------------------------------------------------------------------------- + +# The paper size ('letter' or 'a4'). +#latex_paper_size = 'letter' + +# The font size ('10pt', '11pt' or '12pt'). +#latex_font_size = '10pt' + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, document class [howto/manual]). +_stdauthor = 'Written by the NumPy community' +latex_documents = [ + ('reference/index', 'numpy-ref.tex', 'NumPy Reference', + _stdauthor, 'manual'), + ('user/index', 'numpy-user.tex', 'NumPy User Guide', + _stdauthor, 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# Additional stuff for the LaTeX preamble. +latex_preamble = r''' +\usepackage{amsmath} + +% In the parameters section, place a newline after the Parameters +% header +\usepackage{expdlist} +\let\latexdescription=\description +\def\description{\latexdescription{}{} \breaklabel} + +% Make Examples/etc section headers smaller and more compact +\makeatletter +\titleformat{\paragraph}{\normalsize\py@HeaderFamily}% + {\py@TitleColor}{0em}{\py@TitleColor}{\py@NormalColor} +\titlespacing*{\paragraph}{0pt}{1ex}{0pt} +\makeatother + +% Fix footer/header +\renewcommand{\chaptermark}[1]{\markboth{\MakeUppercase{\thechapter.\ #1}}{}} +\renewcommand{\sectionmark}[1]{\markright{\MakeUppercase{\thesection.\ #1}}} +''' + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +latex_use_modindex = False + + +# ----------------------------------------------------------------------------- +# Intersphinx configuration +# ----------------------------------------------------------------------------- +intersphinx_mapping = {'http://docs.python.org/dev': None} + + +# ----------------------------------------------------------------------------- +# Numpy extensions +# ----------------------------------------------------------------------------- + +# If we want to do a phantom import from an XML file for all autodocs +phantom_import_file = 'dump.xml' + +# Edit links +#numpydoc_edit_link = '`Edit </pydocweb/doc/%(full_name)s/>`__' + +# ----------------------------------------------------------------------------- +# Coverage checker +# ----------------------------------------------------------------------------- +coverage_ignore_modules = r""" + """.split() +coverage_ignore_functions = r""" + test($|_) (some|all)true bitwise_not cumproduct pkgload + generic\. + """.split() +coverage_ignore_classes = r""" + """.split() + +coverage_c_path = [] +coverage_c_regexes = {} +coverage_ignore_c_items = {} + + diff --git a/doc/source/contents.rst b/doc/source/contents.rst new file mode 100644 index 000000000..70f1709b7 --- /dev/null +++ b/doc/source/contents.rst @@ -0,0 +1,12 @@ +##################### +Numpy manual contents +##################### + +.. toctree:: + + user/index + reference/index + about + bugs + license + glossary diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst new file mode 100644 index 000000000..ffa8f7368 --- /dev/null +++ b/doc/source/glossary.rst @@ -0,0 +1,14 @@ +******** +Glossary +******** + +.. toctree:: + +.. glossary:: + + .. automodule:: numpy.doc.glossary + +Jargon +------ + +.. automodule:: numpy.doc.jargon diff --git a/doc/source/license.rst b/doc/source/license.rst new file mode 100644 index 000000000..351a5ad15 --- /dev/null +++ b/doc/source/license.rst @@ -0,0 +1,35 @@ +************* +Numpy License +************* + +Copyright (c) 2005, NumPy Developers + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + +* Neither the name of the NumPy Developers nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/doc/source/reference/arrays.classes.rst b/doc/source/reference/arrays.classes.rst new file mode 100644 index 000000000..65fc10af5 --- /dev/null +++ b/doc/source/reference/arrays.classes.rst @@ -0,0 +1,414 @@ +######################### +Standard array subclasses +######################### + +.. currentmodule:: numpy + +The :class:`ndarray` in NumPy is a "new-style" Python +built-in-type. Therefore, it can be inherited from (in Python or in C) +if desired. Therefore, it can form a foundation for many useful +classes. Often whether to sub-class the array object or to simply use +the core array component as an internal part of a new class is a +difficult decision, and can be simply a matter of choice. NumPy has +several tools for simplifying how your new object interacts with other +array objects, and so the choice may not be significant in the +end. One way to simplify the question is by asking yourself if the +object you are interested can be replaced as a single array or does it +really require two or more arrays at its core. + +Note that :func:`asarray` always returns the base-class ndarray. If +you are confident that your use of the array object can handle any +subclass of an ndarray, then :func:`asanyarray` can be used to allow +subclasses to propagate more cleanly through your subroutine. In +principal a subclass could redefine any aspect of the array and +therefore, under strict guidelines, :func:`asanyarray` would rarely be +useful. However, most subclasses of the arrayobject will not +redefine certain aspects of the array object such as the buffer +interface, or the attributes of the array. One of important example, +however, of why your subroutine may not be able to handle an arbitrary +subclass of an array is that matrices redefine the "*" operator to be +matrix-multiplication, rather than element-by-element multiplication. + + +Special attributes and methods +============================== + +.. seealso:: :ref:`Subclassing ndarray <basics.subclassing>` + +Numpy provides several hooks that subclasses of :class:`ndarray` can +customize: + +.. function:: __array_finalize__(self) + + This method is called whenever the system internally allocates a + new array from *obj*, where *obj* is a subclass (subtype) of the + :class:`ndarray`. It can be used to change attributes of *self* after + construction (so as to ensure a 2-d matrix for example), or to + update meta-information from the "parent." Subclasses inherit a + default implementation of this method that does nothing. + +.. function:: __array_wrap__(array) + + This method should return an instance of the subclass from the + :class:`ndarray` object passed in. For example, this is called + after every :ref:`ufunc <ufuncs.output-type>` for the object with + the highest array priority. The ufunc-computed array object is + passed in and whatever is returned is passed to the + user. Subclasses inherit a default implementation of this method. + +.. data:: __array_priority__ + + The value of this attribute is used to determine what type of + object to return in situations where there is more than one + possibility for the Python type of the returned object. Subclasses + inherit a default value of 1.0 for this attribute. + +.. function:: __array__([dtype]) + + If a class having the :obj:`__array__` method is used as the output + object of an :ref:`ufunc <ufuncs.output-type>`, results will be + written to the object returned by :obj:`__array__`. + +Matrix objects +============== + +.. index:: + single: matrix + +:class:`matrix` objects inherit from the ndarray and therefore, they +have the same attributes and methods of ndarrays. There are six +important differences of matrix objects, however that may lead to +unexpected results when you use matrices but expect them to act like +arrays: + +1. Matrix objects can be created using a string notation to allow Matlab- + style syntax where spaces separate columns and semicolons (';') + separate rows. + +2. Matrix objects are always two-dimensional. This has far-reaching + implications, in that m.ravel() is still two-dimensional (with a 1 in + the first dimension) and item selection returns two-dimensional + objects so that sequence behavior is fundamentally different than + arrays. + +3. Matrix objects over-ride multiplication to be + matrix-multiplication. **Make sure you understand this for + functions that you may want to receive matrices. Especially in + light of the fact that asanyarray(m) returns a matrix when m is a + matrix.** + +4. Matrix objects over-ride power to be matrix raised to a power. The + same warning about using power inside a function that uses + asanyarray(...) to get an array object holds for this fact. + +5. The default __array_priority\__ of matrix objects is 10.0, and + therefore mixed operations with ndarrays always produce matrices. + +6. Matrices have special attributes which make calculations easier. These + are + + .. autosummary:: + :toctree: generated/ + + matrix.T + matrix.H + matrix.I + matrix.A + +.. warning:: + + Matrix objects over-ride multiplication, '*', and power, '**', to be + matrix-multiplication and matrix power, respectively. If your + subroutine can accept sub-classes and you do not convert to base-class + arrays, then you must use the ufuncs multiply and power to be sure + that you are performing the correct operation for all inputs. + +The matrix class is a Python subclass of the ndarray and can be used +as a reference for how to construct your own subclass of the ndarray. +Matrices can be created from other matrices, strings, and anything +else that can be converted to an ``ndarray`` . The name "mat "is an +alias for "matrix "in NumPy. + +.. autosummary:: + :toctree: generated/ + + matrix + asmatrix + bmat + +Example 1: Matrix creation from a string + +>>> a=mat('1 2 3; 4 5 3') +>>> print (a*a.T).I +[[ 0.2924 -0.1345] + [-0.1345 0.0819]] + +Example 2: Matrix creation from nested sequence + +>>> mat([[1,5,10],[1.0,3,4j]]) +matrix([[ 1.+0.j, 5.+0.j, 10.+0.j], + [ 1.+0.j, 3.+0.j, 0.+4.j]]) + +Example 3: Matrix creation from an array + +>>> mat(random.rand(3,3)).T +matrix([[ 0.7699, 0.7922, 0.3294], + [ 0.2792, 0.0101, 0.9219], + [ 0.3398, 0.7571, 0.8197]]) + +Memory-mapped file arrays +========================= + +.. index:: + single: memory maps + +.. currentmodule:: numpy + +Memory-mapped files are useful for reading and/or modifying small +segments of a large file with regular layout, without reading the +entire file into memory. A simple subclass of the ndarray uses a +memory-mapped file for the data buffer of the array. For small files, +the over-head of reading the entire file into memory is typically not +significant, however for large files using memory mapping can save +considerable resources. + +Memory-mapped-file arrays have one additional method (besides those +they inherit from the ndarray): :meth:`.flush() <memmap.flush>` which +must be called manually by the user to ensure that any changes to the +array actually get written to disk. + +.. note:: + + Memory-mapped arrays use the the Python memory-map object which (prior + to Python 2.5) does not allow files to be larger than a certain size + depending on the platform. This size is always < 2GB even on 64-bit + systems. + +.. autosummary:: + :toctree: generated/ + + memmap + memmap.flush + +Example: + +>>> a = memmap('newfile.dat', dtype=float, mode='w+', shape=1000) +>>> a[10] = 10.0 +>>> a[30] = 30.0 +>>> del a +>>> b = fromfile('newfile.dat', dtype=float) +>>> print b[10], b[30] +10.0 30.0 +>>> a = memmap('newfile.dat', dtype=float) +>>> print a[10], a[30] +10.0 30.0 + + +Character arrays (:mod:`numpy.char`) +==================================== + +.. seealso:: :ref:`routines.array-creation.char` + +.. index:: + single: character arrays + +These are enhanced arrays of either :class:`string` type or +:class:`unicode_` type. These arrays inherit from the +:class:`ndarray`, but specially-define the operations ``+``, ``*``, +and ``%`` on a (broadcasting) element-by-element basis. These +operations are not available on the standard :class:`ndarray` of +character type. In addition, the :class:`chararray` has all of the +standard :class:`string <str>` (and :class:`unicode`) methods, +executing them on an element-by-element basis. Perhaps the easiest way +to create a chararray is to use :meth:`self.view(chararray) +<ndarray.view>` where *self* is an ndarray of string or unicode +data-type. However, a chararray can also be created using the +:meth:`numpy.chararray` constructor, or via the +:func:`numpy.char.array` function: + +.. autosummary:: + :toctree: generated/ + + chararray + core.defchararray.array + +Another difference with the standard ndarray of string data-type is +that the chararray inherits the feature introduced by Numarray that +white-space at the end of any element in the array will be ignored on +item retrieval and comparison operations. + + +.. _arrays.classes.rec: + +Record arrays (:mod:`numpy.rec`) +================================ + +.. seealso:: :ref:`routines.array-creation.rec`, :ref:`routines.dtype`, + :ref:`arrays.dtypes`. + +Numpy provides the :class:`recarray` class which allows accessing the +fields of a record/structured array as attributes, and a corresponding +scalar data type object :class:`record`. + +.. currentmodule:: numpy + +.. autosummary:: + :toctree: generated/ + + recarray + record + +Masked arrays (:mod:`numpy.ma`) +=============================== + +.. seealso:: :ref:`routines.ma` + +.. XXX: masked array documentation should be improved + +.. currentmodule:: numpy + +.. index:: + single: masked arrays + +.. autosummary:: + :toctree: generated/ + + ma.masked_array + +.. automodule:: numpy.ma + + +Standard container class +======================== + +.. currentmodule:: numpy + +For backward compatibility and as a standard "container "class, the +UserArray from Numeric has been brought over to NumPy and named +:class:`numpy.lib.user_array.container` The container class is a +Python class whose self.array attribute is an ndarray. Multiple +inheritance is probably easier with numpy.lib.user_array.container +than with the ndarray itself and so it is included by default. It is +not documented here beyond mentioning its existence because you are +encouraged to use the ndarray class directly if you can. + +.. autosummary:: + :toctree: generated/ + + numpy.lib.user_array.container + +.. index:: + single: user_array + single: container class + + +Array Iterators +=============== + +.. currentmodule:: numpy + +.. index:: + single: array iterator + +Iterators are a powerful concept for array processing. Essentially, +iterators implement a generalized for-loop. If *myiter* is an iterator +object, then the Python code:: + + for val in myiter: + ... + some code involving val + ... + +calls ``val = myiter.next()`` repeatedly until :exc:`StopIteration` is +raised by the iterator. There are several ways to iterate over an +array that may be useful: default iteration, flat iteration, and +:math:`N`-dimensional enumeration. + + +Default iteration +----------------- + +The default iterator of an ndarray object is the default Python +iterator of a sequence type. Thus, when the array object itself is +used as an iterator. The default behavior is equivalent to:: + + for i in arr.shape[0]: + val = arr[i] + +This default iterator selects a sub-array of dimension :math:`N-1` from the array. This can be a useful construct for defining recursive +algorithms. To loop over the entire array requires :math:`N` for-loops. + +>>> a = arange(24).reshape(3,2,4)+10 +>>> for val in a: +... print 'item:', val +item: [[10 11 12 13] + [14 15 16 17]] +item: [[18 19 20 21] + [22 23 24 25]] +item: [[26 27 28 29] + [30 31 32 33]] + + +Flat iteration +-------------- + +.. autosummary:: + :toctree: generated/ + + ndarray.flat + +As mentioned previously, the flat attribute of ndarray objects returns +an iterator that will cycle over the entire array in C-style +contiguous order. + +>>> for i, val in enumerate(a.flat): +... if i%5 == 0: print i, val +0 10 +5 15 +10 20 +15 25 +20 30 + +Here, I've used the built-in enumerate iterator to return the iterator +index as well as the value. + + +N-dimensional enumeration +------------------------- + +.. autosummary:: + :toctree: generated/ + + ndenumerate + +Sometimes it may be useful to get the N-dimensional index while +iterating. The ndenumerate iterator can achieve this. + +>>> for i, val in ndenumerate(a): +... if sum(i)%5 == 0: print i, val +(0, 0, 0) 10 +(1, 1, 3) 25 +(2, 0, 3) 29 +(2, 1, 2) 32 + + +Iterator for broadcasting +------------------------- + +.. autosummary:: + :toctree: generated/ + + broadcast + +The general concept of broadcasting is also available from Python +using the :class:`broadcast` iterator. This object takes :math:`N` +objects as inputs and returns an iterator that returns tuples +providing each of the input sequence elements in the broadcasted +result. + +>>> for val in broadcast([[1,0],[2,3]],[0,1]): +... print val +(1, 0) +(0, 1) +(2, 0) +(3, 1) diff --git a/doc/source/reference/arrays.dtypes.rst b/doc/source/reference/arrays.dtypes.rst new file mode 100644 index 000000000..6b0d2cea3 --- /dev/null +++ b/doc/source/reference/arrays.dtypes.rst @@ -0,0 +1,513 @@ +.. currentmodule:: numpy + +.. _arrays.dtypes: + +********************************** +Data type objects (:class:`dtype`) +********************************** + +A data type object (an instance of :class:`numpy.dtype` class) +describes how the bytes in the fixed-size block of memory +corresponding to an array item should be interpreted. It describes the +following aspects of the data: + +1. Type of the data (integer, float, Python object, etc.) +2. Size of the data (how many bytes is in *e.g.* the integer) +3. Byte order of the data (:term:`little-endian` or :term:`big-endian`) +4. If the data type is a :term:`record`, an aggregate of other + data types, (*e.g.*, describing an array item consisting of + an integer and a float), + + 1. what are the names of the ":term:`fields <field>`" of the record, + by which they can be :ref:`accessed <arrays.indexing.rec>`, + 2. what is the data-type of each :term:`field`, and + 3. which part of the memory block each field takes. + +5. If the data is a sub-array, what is its shape and data type. + +.. index:: + pair: dtype; scalar + +To describe the type of scalar data, there are several :ref:`built-in +scalar types <arrays.scalars.built-in>` in Numpy for various precision +of integers, floating-point numbers, *etc*. An item extracted from an +array, *e.g.*, by indexing, will be a Python object whose type is the +scalar type associated with the data type of the array. + +Note that the scalar types are not :class:`dtype` objects, even though +they can be used in place of one whenever a data type specification is +needed in Numpy. + +.. index:: + pair: dtype; field + pair: dtype; record + +Record data types are formed by creating a data type whose +:term:`fields` contain other data types. Each field has a name by +which it can be :ref:`accessed <arrays.indexing.rec>`. The parent data +type should be of sufficient size to contain all its fields; the +parent can for example be based on the :class:`void` type which allows +an arbitrary item size. Record data types may also contain other record +types and fixed-size sub-array data types in their fields. + +.. index:: + pair: dtype; sub-array + +Finally, a data type can describe items that are themselves arrays of +items of another data type. These sub-arrays must, however, be of a +fixed size. If an array is created using a data-type describing a +sub-array, the dimensions of the sub-array are appended to the shape +of the array when the array is created. Sub-arrays in a field of a +record behave differently, see :ref:`arrays.indexing.rec`. + +.. admonition:: Example + + A simple data type containing a 32-bit big-endian integer: + (see :ref:`arrays.dtypes.constructing` for details on construction) + + >>> dt = np.dtype('>i4') + >>> dt.byteorder + '>' + >>> dt.itemsize + 4 + >>> dt.name + 'int32' + >>> dt.type is np.int32 + True + + The corresponding array scalar type is :class:`int32`. + +.. admonition:: Example + + A record data type containing a 16-character string (in field 'name') + and a sub-array of two 64-bit floating-point number (in field 'grades'): + + >>> dt = np.dtype([('name', np.str_, 16), ('grades', np.float64, (2,))]) + >>> dt['name'] + dtype('|S16') + >>> dt['grades'] + dtype(('float64',(2,))) + + Items of an array of this data type are wrapped in an :ref:`array + scalar <arrays.scalars>` type that also has two fields: + + >>> x = np.array([('Sarah', (8.0, 7.0)), ('John', (6.0, 7.0))], dtype=dt) + >>> x[1] + ('John', [6.0, 7.0]) + >>> x[1]['grades'] + array([ 6., 7.]) + >>> type(x[1]) + <type 'numpy.void'> + >>> type(x[1]['grades']) + <type 'numpy.ndarray'> + +.. _arrays.dtypes.constructing: + +Specifying and constructing data types +====================================== + +Whenever a data-type is required in a NumPy function or method, either +a :class:`dtype` object or something that can be converted to one can +be supplied. Such conversions are done by the :class:`dtype` +constructor: + +.. autosummary:: + :toctree: generated/ + + dtype + +What can be converted to a data-type object is described below: + +:class:`dtype` object + + .. index:: + triple: dtype; construction; from dtype + + Used as-is. + +:const:`None` + + .. index:: + triple: dtype; construction; from None + + The default data type: :class:`float_`. + +.. index:: + triple: dtype; construction; from type + +Array-scalar types + + The 21 built-in :ref:`array scalar type objects + <arrays.scalars.built-in>` all convert to an associated data-type object. + This is true for their sub-classes as well. + + Note that not all data-type information can be supplied with a + type-object: for example, :term:`flexible` data-types have + a default *itemsize* of 0, and require an explicitly given size + to be useful. + + .. admonition:: Example + + >>> dt = np.dtype(np.int32) # 32-bit integer + >>> dt = np.dtype(np.complex128) # 128-bit complex floating-point number + +Generic types + + The generic hierarchical type objects convert to corresponding + type objects according to the associations: + + ===================================================== =============== + :class:`number`, :class:`inexact`, :class:`floating` :class:`float` + :class:`complexfloating` :class:`cfloat` + :class:`integer`, :class:`signedinteger` :class:`int\_` + :class:`unsignedinteger` :class:`uint` + :class:`character` :class:`string` + :class:`generic`, :class:`flexible` :class:`void` + ===================================================== =============== + +Built-in Python types + + Several python types are equivalent to a corresponding + array scalar when used to generate a :class:`dtype` object: + + ================ =============== + :class:`int` :class:`int\_` + :class:`bool` :class:`bool\_` + :class:`float` :class:`float\_` + :class:`complex` :class:`cfloat` + :class:`str` :class:`string` + :class:`unicode` :class:`unicode\_` + :class:`buffer` :class:`void` + (all others) :class:`object_` + ================ =============== + + .. admonition:: Example + + >>> dt = np.dtype(float) # Python-compatible floating-point number + >>> dt = np.dtype(int) # Python-compatible integer + >>> dt = np.dtype(object) # Python object + +Types with ``.dtype`` + + Any type object with a ``dtype`` attribute: The attribute will be + accessed and used directly. The attribute must return something + that is convertible into a dtype object. + +.. index:: + triple: dtype; construction; from string + +Several kinds of strings can be converted. Recognized strings can be +prepended with ``'>'`` (:term:`big-endian`), ``'<'`` +(:term:`little-endian`), or ``'='`` (hardware-native, the default), to +specify the byte order. + +One-character strings + + Each built-in data-type has a character code + (the updated Numeric typecodes), that uniquely identifies it. + + .. admonition:: Example + + >>> dt = np.dtype('b') # byte, native byte order + >>> dt = np.dtype('>H') # big-endian unsigned short + >>> dt = np.dtype('<f') # little-endian single-precision float + >>> dt = np.dtype('d') # double-precision floating-point number + +Array-protocol type strings (see :ref:`arrays.interface`) + + The first character specifies the kind of data and the remaining + characters specify how many bytes of data. The supported kinds are + + ================ ======================== + ``'b'`` Boolean + ``'i'`` (signed) integer + ``'u'`` unsigned integer + ``'f'`` floating-point + ``'c'`` complex-floating point + ``'S'``, ``'a'`` string + ``'U'`` unicode + ``'V'`` anything (:class:`void`) + ================ ======================== + + .. admonition:: Example + + >>> dt = np.dtype('i4') # 32-bit signed integer + >>> dt = np.dtype('f8') # 64-bit floating-point number + >>> dt = np.dtype('c16') # 128-bit complex floating-point number + >>> dt = np.dtype('a25') # 25-character string + +String with comma-separated fields + + Numarray introduced a short-hand notation for specifying the format + of a record as a comma-separated string of basic formats. + + A basic format in this context is an optional shape specifier + followed by an array-protocol type string. Parenthesis are required + on the shape if it is greater than 1-d. NumPy allows a modification + on the format in that any string that can uniquely identify the + type can be used to specify the data-type in a field. + The generated data-type fields are named ``'f0'``, ``'f2'``, ..., + ``'f<N-1>'`` where N (>1) is the number of comma-separated basic + formats in the string. If the optional shape specifier is provided, + then the data-type for the corresponding field describes a sub-array. + + .. admonition:: Example + + - field named ``f0`` containing a 32-bit integer + - field named ``f1`` containing a 2 x 3 sub-array + of 64-bit floating-point numbers + - field named ``f2`` containing a 32-bit floating-point number + + >>> dt = np.dtype("i4, (2,3)f8, f4") + + - field named ``f0`` containing a 3-character string + - field named ``f1`` containing a sub-array of shape (3,) + containing 64-bit unsigned integers + - field named ``f2`` containing a 3 x 4 sub-array + containing 10-character strings + + >>> dt = np.dtype("a3, 3u8, (3,4)a10") + +Type strings + + Any string in :obj:`numpy.sctypeDict`.keys(): + + .. admonition:: Example + + >>> dt = np.dtype('uint32') # 32-bit unsigned integer + >>> dt = np.dtype('Float64') # 64-bit floating-point number + +.. index:: + triple: dtype; construction; from tuple + +``(flexible_dtype, itemsize)`` + + The first argument must be an object that is converted to a + flexible data-type object (one whose element size is 0), the + second argument is an integer providing the desired itemsize. + + .. admonition:: Example + + >>> dt = np.dtype((void, 10)) # 10-byte wide data block + >>> dt = np.dtype((str, 35)) # 35-character string + >>> dt = np.dtype(('U', 10)) # 10-character unicode string + +``(fixed_dtype, shape)`` + + .. index:: + pair: dtype; sub-array + + The first argument is any object that can be converted into a + fixed-size data-type object. The second argument is the desired + shape of this type. If the shape parameter is 1, then the + data-type object is equivalent to fixed dtype. If *shape* is a + tuple, then the new dtype defines a sub-array of the given shape. + + .. admonition:: Example + + >>> dt = np.dtype((np.int32, (2,2))) # 2 x 2 integer sub-array + >>> dt = np.dtype(('S10', 1)) # 10-character string + >>> dt = np.dtype(('i4, (2,3)f8, f4', (2,3))) # 2 x 3 record sub-array + +``(base_dtype, new_dtype)`` + + Both arguments must be convertible to data-type objects in this + case. The *base_dtype* is the data-type object that the new + data-type builds on. This is how you could assign named fields to + any built-in data-type object. + + .. admonition:: Example + + 32-bit integer, whose first two bytes are interpreted as an integer + via field ``real``, and the following two bytes via field ``imag``. + + >>> dt = np.dtype((np.int32, {'real': (np.int16, 0), 'imag': (np.int16, 2)}) + + 32-bit integer, which is interpreted as consisting of a sub-array + of shape ``(4,)`` containing 8-bit integers: + + >>> dt = np.dtype((np.int32, (np.int8, 4))) + + 32-bit integer, containing fields ``r``, ``g``, ``b``, ``a`` that + interpret the 4 bytes in the integer as four unsigned integers: + + >>> dt = np.dtype(('i4', [('r','u1'),('g','u1'),('b','u1'),('a','u1')])) + +.. note:: XXX: does the second-to-last example above make sense? + +.. index:: + triple: dtype; construction; from list + +``[(field_name, field_dtype, field_shape), ...]`` + + *obj* should be a list of fields where each field is described by a + tuple of length 2 or 3. (Equivalent to the ``descr`` item in the + :obj:`__array_interface__` attribute.) + + The first element, *field_name*, is the field name (if this is + ``''`` then a standard field name, ``'f#'``, is assigned). The + field name may also be a 2-tuple of strings where the first string + is either a "title" (which may be any string or unicode string) or + meta-data for the field which can be any object, and the second + string is the "name" which must be a valid Python identifier. + + The second element, *field_dtype*, can be anything that can be + interpreted as a data-type. + + The optional third element *field_shape* contains the shape if this + field represents an array of the data-type in the second + element. Note that a 3-tuple with a third argument equal to 1 is + equivalent to a 2-tuple. + + This style does not accept *align* in the :class:`dtype` + constructor as it is assumed that all of the memory is accounted + for by the array interface description. + + .. admonition:: Example + + Data-type with fields ``big`` (big-endian 32-bit integer) and + ``little`` (little-endian 32-bit integer): + + >>> dt = np.dtype([('big', '>i4'), ('little', '<i4')]) + + Data-type with fields ``R``, ``G``, ``B``, ``A``, each being an + unsigned 8-bit integer: + + >>> dt = np.dtype([('R','u1'), ('G','u1'), ('B','u1'), ('A','u1')]) + +.. index:: + triple: dtype; construction; from dict + +``{'names': ..., 'formats': ..., 'offsets': ..., 'titles': ...}`` + + This style has two required and two optional keys. The *names* + and *formats* keys are required. Their respective values are + equal-length lists with the field names and the field formats. + The field names must be strings and the field formats can be any + object accepted by :class:`dtype` constructor. + + The optional keys in the dictionary are *offsets* and *titles* and + their values must each be lists of the same length as the *names* + and *formats* lists. The *offsets* value is a list of byte offsets + (integers) for each field, while the *titles* value is a list of + titles for each field (:const:`None` can be used if no title is + desired for that field). The *titles* can be any :class:`string` + or :class:`unicode` object and will add another entry to the + fields dictionary keyed by the title and referencing the same + field tuple which will contain the title as an additional tuple + member. + + .. admonition:: Example + + Data type with fields ``r``, ``g``, ``b``, ``a``, each being + a 8-bit unsigned integer: + + >>> dt = np.dtype({'names': ['r','g','b','a'], + ... 'formats': [uint8, uint8, uint8, uint8]}) + + Data type with fields ``r`` and ``b`` (with the given titles), + both being 8-bit unsigned integers, the first at byte position + 0 from the start of the field and the second at position 2: + + >>> dt = np.dtype({'names': ['r','b'], 'formats': ['u1', 'u1'], + ... 'offsets': [0, 2], + ... 'titles': ['Red pixel', 'Blue pixel']}) + + +``{'field1': ..., 'field2': ..., ...}`` + + This style allows passing in the :attr:`fields <dtype.fields>` + attribute of a data-type object. + + *obj* should contain string or unicode keys that refer to + ``(data-type, offset)`` or ``(data-type, offset, title)`` tuples. + + .. admonition:: Example + + Data type containing field ``col1`` (10-character string at + byte position 0), ``col2`` (32-bit float at byte position 10), + and ``col3`` (integers at byte position 14): + + >>> dt = np.dtype({'col1': ('S10', 0), 'col2': (float32, 10), 'col3': (int, 14)}) + + +:class:`dtype` +============== + +Numpy data type descriptions are instances of the :class:`dtype` class. + +Attributes +---------- + +The type of the data is described by the following :class:`dtype` attributes: + +.. autosummary:: + :toctree: generated/ + + dtype.type + dtype.kind + dtype.char + dtype.num + dtype.str + +Size of the data is in turn described by: + +.. autosummary:: + :toctree: generated/ + + dtype.name + dtype.itemsize + +Endianness of this data: + +.. autosummary:: + :toctree: generated/ + + dtype.byteorder + +Information about sub-data-types in a :term:`record`: + +.. autosummary:: + :toctree: generated/ + + dtype.fields + dtype.names + +For data types that describe sub-arrays: + +.. autosummary:: + :toctree: generated/ + + dtype.subdtype + dtype.shape + +Attributes providing additional information: + +.. autosummary:: + :toctree: generated/ + + dtype.hasobject + dtype.flags + dtype.isbuiltin + dtype.isnative + dtype.descr + dtype.alignment + + +Methods +------- + +Data types have the following method for changing the byte order: + +.. autosummary:: + :toctree: generated/ + + dtype.newbyteorder + +The following methods implement the pickle protocol: + +.. autosummary:: + :toctree: generated/ + + dtype.__reduce__ + dtype.__setstate__ diff --git a/doc/source/reference/arrays.indexing.rst b/doc/source/reference/arrays.indexing.rst new file mode 100644 index 000000000..000a06def --- /dev/null +++ b/doc/source/reference/arrays.indexing.rst @@ -0,0 +1,375 @@ +.. _arrays.indexing: + +Indexing +======== + +.. sectionauthor:: adapted from "Guide to Numpy" by Travis E. Oliphant + +.. currentmodule:: numpy + +.. index:: indexing, slicing + +:class:`ndarrays <ndarray>` can be indexed using the standard Python +``x[obj]`` syntax, where *x* is the array and *obj* the selection. +There are three kinds of indexing available: record access, basic +slicing, advanced indexing. Which one occurs depends on *obj*. + +.. note:: + + In Python, ``x[(exp1, exp2, ..., expN)]`` is equivalent to + ``x[exp1, exp2, ..., expN]``; the latter is just syntactic sugar + for the former. + + +Basic Slicing +------------- + +Basic slicing extends Python's basic concept of slicing to N +dimensions. Basic slicing occurs when *obj* is a :class:`slice` object +(constructed by ``start:stop:step`` notation inside of brackets), an +integer, or a tuple of slice objects and integers. :const:`Ellipsis` +and :const:`newaxis` objects can be interspersed with these as +well. In order to remain backward compatible with a common usage in +Numeric, basic slicing is also initiated if the selection object is +any sequence (such as a :class:`list`) containing :class:`slice` +objects, the :const:`Ellipsis` object, or the :const:`newaxis` object, +but no integer arrays or other embedded sequences. + +.. index:: + triple: ndarray; special methods; getslice + triple: ndarray; special methods; setslice + single: ellipsis + single: newaxis + +The simplest case of indexing with *N* integers returns an :ref:`array +scalar <arrays.scalars>` representing the corresponding item. As in +Python, all indices are zero-based: for the *i*-th index :math:`n_i`, +the valid range is :math:`0 \le n_i < d_i` where :math:`d_i` is the +*i*-th element of the shape of the array. Negative indices are +interpreted as counting from the end of the array (*i.e.*, if *i < 0*, +it means :math:`n_i + i`). + + +All arrays generated by basic slicing are always :term:`views <view>` +of the original array. + +The standard rules of sequence slicing apply to basic slicing on a +per-dimension basis (including using a step index). Some useful +concepts to remember include: + +- The basic slice syntax is ``i:j:k`` where *i* is the starting index, + *j* is the stopping index, and *k* is the step (:math:`k\neq0`). + This selects the *m* elements (in the corresponding dimension) with + index values *i*, *i + k*, ..., *i + (m - 1) k* where + :math:`m = q + (r\neq0)` and *q* and *r* are the quotient and remainder + obtained by dividing *j - i* by *k*: *j - i = q k + r*, so that + *i + (m - 1) k < j*. + + .. admonition:: Example + + >>> x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + >>> x[1:7:2] + array([1, 3, 5]) + +- Negative *i* and *j* are interpreted as *n + i* and *n + j* where + *n* is the number of elements in the corresponding dimension. + Negative *k* makes stepping go towards smaller indices. + + .. admonition:: Example + + >>> x[-2:10] + array([8, 9]) + >>> x[-3:3:-1] + array([7, 6, 5, 4]) + +- Assume *n* is the number of elements in the dimension being + sliced. Then, if *i* is not given it defaults to 0 for *k > 0* and + *n* for *k < 0* . If *j* is not given it defaults to *n* for *k > 0* + and -1 for *k < 0* . If *k* is not given it defaults to 1. Note that + ``::`` is the same as ``:`` and means select all indices along this + axis. + + .. admonition:: Example + + >>> x[5:] + array([5, 6, 7, 8, 9]) + +- If the number of objects in the selection tuple is less than + *N* , then ``:`` is assumed for any subsequent dimensions. + + .. admonition:: Example + + >>> x = np.array([[[1],[2],[3]], [[4],[5],[6]]]) + >>> x.shape + (2, 3, 1) + >>> x[1:2] + array([[[4], + [5], + [6]]]) + +- :const:`Ellipsis` expand to the number of ``:`` objects needed to + make a selection tuple of the same length as ``x.ndim``. Only the + first ellipsis is expanded, any others are interpreted as ``:``. + + .. admonition:: Example + + >>> x[...,0] + array([[1, 2, 3], + [4, 5, 6]]) + +- Each :const:`newaxis` object in the selection tuple serves to expand + the dimensions of the resulting selection by one unit-length + dimension. The added dimension is the position of the :const:`newaxis` + object in the selection tuple. + + .. admonition:: Example + + >>> x[:,np.newaxis,:,:].shape + (2, 1, 3, 1) + +- An integer, *i*, returns the same values as ``i:i+1`` + **except** the dimensionality of the returned object is reduced by + 1. In particular, a selection tuple with the *p*-th + element an integer (and all other entries ``:``) returns the + corresponding sub-array with dimension *N - 1*. If *N = 1* + then the returned object is an array scalar. These objects are + explained in :ref:`arrays.scalars`. + +- If the selection tuple has all entries ``:`` except the + *p*-th entry which is a slice object ``i:j:k``, + then the returned array has dimension *N* formed by + concatenating the sub-arrays returned by integer indexing of + elements *i*, *i+k*, ..., *i + (m - 1) k < j*, + +- Basic slicing with more than one non-``:`` entry in the slicing + tuple, acts like repeated application of slicing using a single + non-``:`` entry, where the non-``:`` entries are successively taken + (with all other non-``:`` entries replaced by ``:``). Thus, + ``x[ind1,...,ind2,:]`` acts like ``x[ind1][...,ind2,:]`` under basic + slicing. + + .. warning:: The above is **not** true for advanced slicing. + +- You may use slicing to set values in the array, but (unlike lists) you + can never grow the array. The size of the value to be set in + ``x[obj] = value`` must be (broadcastable) to the same shape as + ``x[obj]``. + +.. index:: + pair: ndarray; view + +.. note:: + + Remember that a slicing tuple can always be constructed as *obj* + and used in the ``x[obj]`` notation. Slice objects can be used in + the construction in place of the ``[start:stop:step]`` + notation. For example, ``x[1:10:5,::-1]`` can also be implemented + as ``obj = (slice(1,10,5), slice(None,None,-1)); x[obj]`` . This + can be useful for constructing generic code that works on arrays + of arbitrary dimension. + +.. data:: newaxis + + The :const:`newaxis` object can be used in the basic slicing syntax + discussed above. :const:`None` can also be used instead of + :const:`newaxis`. + + +Advanced indexing +----------------- + +Advanced indexing is triggered when the selection object, *obj*, is a +non-tuple sequence object, an :class:`ndarray` (of data type integer or bool), +or a tuple with at least one sequence object or ndarray (of data type +integer or bool). There are two types of advanced indexing: integer +and Boolean. + +Advanced indexing always returns a *copy* of the data (contrast with +basic slicing that returns a :term:`view`). + +Integer +^^^^^^^ + +Integer indexing allows selection of arbitrary items in the array +based on their *N*-dimensional index. This kind of selection occurs +when advanced indexing is triggered and the selection object is not +an array of data type bool. For the discussion below, when the +selection object is not a tuple, it will be referred to as if it had +been promoted to a 1-tuple, which will be called the selection +tuple. The rules of advanced integer-style indexing are: + +- If the length of the selection tuple is larger than *N* an error is raised. + +- All sequences and scalars in the selection tuple are converted to + :class:`intp` indexing arrays. + +- All selection tuple objects must be convertible to :class:`intp` + arrays, :class:`slice` objects, or the :const:`Ellipsis` object. + +- The first :const:`Ellipsis` object will be expanded, and any other + :const:`Ellipsis` objects will be treated as full slice (``:``) + objects. The expanded :const:`Ellipsis` object is replaced with as + many full slice (``:``) objects as needed to make the length of the + selection tuple :math:`N`. + +- If the selection tuple is smaller than *N*, then as many ``:`` + objects as needed are added to the end of the selection tuple so + that the modified selection tuple has length *N*. + +- All the integer indexing arrays must be :ref:`broadcastable + <arrays.broadcasting.broadcastable>` to the same shape. + +- The shape of the output (or the needed shape of the object to be used + for setting) is the broadcasted shape. + +- After expanding any ellipses and filling out any missing ``:`` + objects in the selection tuple, then let :math:`N_t` be the number + of indexing arrays, and let :math:`N_s = N - N_t` be the number of + slice objects. Note that :math:`N_t > 0` (or we wouldn't be doing + advanced integer indexing). + +- If :math:`N_s = 0` then the *M*-dimensional result is constructed by + varying the index tuple ``(i_1, ..., i_M)`` over the range + of the result shape and for each value of the index tuple + ``(ind_1, ..., ind_M)``:: + + result[i_1, ..., i_M] == x[ind_1[i_1, ..., i_M], ind_2[i_1, ..., i_M], + ..., ind_N[i_1, ..., i_M]] + + .. admonition:: Example + + Suppose the shape of the broadcasted indexing arrays is 3-dimensional + and *N* is 2. Then the result is found by letting *i, j, k* run over + the shape found by broadcasting ``ind_1`` and ``ind_2``, and each + *i, j, k* yields:: + + result[i,j,k] = x[ind_1[i,j,k], ind_2[i,j,k]] + +- If :math:`N_s > 0`, then partial indexing is done. This can be + somewhat mind-boggling to understand, but if you think in terms of + the shapes of the arrays involved, it can be easier to grasp what + happens. In simple cases (*i.e.* one indexing array and *N - 1* slice + objects) it does exactly what you would expect (concatenation of + repeated application of basic slicing). The rule for partial + indexing is that the shape of the result (or the interpreted shape + of the object to be used in setting) is the shape of *x* with the + indexed subspace replaced with the broadcasted indexing subspace. If + the index subspaces are right next to each other, then the + broadcasted indexing space directly replaces all of the indexed + subspaces in *x*. If the indexing subspaces are separated (by slice + objects), then the broadcasted indexing space is first, followed by + the sliced subspace of *x*. + + .. admonition:: Example + + Suppose ``x.shape`` is (10,20,30) and ``ind`` is a (2,3,4)-shaped + indexing :class:`intp` array, then ``result = x[...,ind,:]`` has + shape (10,2,3,4,30) because the (20,)-shaped subspace has been + replaced with a (2,3,4)-shaped broadcasted indexing subspace. If + we let *i, j, k* loop over the (2,3,4)-shaped subspace then + ``result[...,i,j,k,:] = x[...,ind[i,j,k],:]``. This example + produces the same result as :meth:`x.take(ind, axis=-2) <ndarray.take>`. + + .. admonition:: Example + + Now let ``x.shape`` be (10,20,30,40,50) and suppose ``ind_1`` + and ``ind_2`` are broadcastable to the shape (2,3,4). Then + ``x[:,ind_1,ind_2]`` has shape (10,2,3,4,40,50) because the + (20,30)-shaped subspace from X has been replaced with the + (2,3,4) subspace from the indices. However, + ``x[:,ind_1,:,ind_2]`` has shape (2,3,4,10,30,50) because there + is no unambiguous place to drop in the indexing subspace, thus + it is tacked-on to the beginning. It is always possible to use + :meth:`.transpose() <ndarray.transpose>` to move the subspace + anywhere desired. (Note that this example cannot be replicated + using :func:`take`.) + + +Boolean +^^^^^^^ + +This advanced indexing occurs when obj is an array object of Boolean +type (such as may be returned from comparison operators). It is always +equivalent to (but faster than) ``x[obj.nonzero()]`` where, as +described above, :meth:`obj.nonzero() <ndarray.nonzero>` returns a +tuple (of length :attr:`obj.ndim <ndarray.ndim>`) of integer index +arrays showing the :const:`True` elements of *obj*. + +The special case when ``obj.ndim == x.ndim`` is worth mentioning. In +this case ``x[obj]`` returns a 1-dimensional array filled with the +elements of *x* corresponding to the :const:`True` values of *obj*. +The search order will be C-style (last index varies the fastest). If +*obj* has :const:`True` values at entries that are outside of the +bounds of *x*, then an index error will be raised. + +You can also use Boolean arrays as element of the selection tuple. In +such instances, they will always be interpreted as :meth:`nonzero(obj) +<ndarray.nonzero>` and the equivalent integer indexing will be +done. + +.. warning:: + + The definition of advanced indexing means that ``x[(1,2,3),]`` is + fundamentally different than ``x[(1,2,3)]``. The latter is + equivalent to ``x[1,2,3]`` which will trigger basic selection while + the former will trigger advanced indexing. Be sure to understand + why this is occurs. + + Also recognize that ``x[[1,2,3]]`` will trigger advanced indexing, + whereas ``x[[1,2,slice(None)]]`` will trigger basic slicing. + +.. note:: + + XXX: this section may need some tuning... + Also the above warning needs explanation as the last part is at odds + with the definition of basic indexing. + + +.. _arrays.indexing.rec: + +Record Access +------------- + +.. seealso:: :ref:`arrays.dtypes`, :ref:`arrays.scalars` + +If the :class:`ndarray` object is a record array, *i.e.* its data type +is a :term:`record` data type, the :term:`fields <field>` of the array +can be accessed by indexing the array with strings, dictionary-like. + +Indexing ``x['field-name']`` returns a new :term:`view` to the array, +which is of the same shape as *x* (except when the field is a +sub-array) but of data type ``x.dtype['field-name']`` and contains +only the part of the data in the specified field. Also record array +scalars can be "indexed" this way. + +If the accessed field is a sub-array, the dimensions of the sub-array +are appended to the shape of the result. + +.. admonition:: Example + + >>> x = np.zeros((2,2), dtype=[('a', np.int32), ('b', np.float64, (3,3))]) + >>> x['a'].shape + (2, 2) + >>> x['a'].dtype + dtype('int32') + >>> x['b'].shape + (2, 2, 3, 3) + >>> x['b'].dtype + dtype('float64') + + +Flat Iterator indexing +---------------------- + +:attr:`x.flat <ndarray.flat>` returns an iterator that will iterate +over the entire array (in C-contiguous style with the last index +varying the fastest). This iterator object can also be indexed using +basic slicing or advanced indexing as long as the selection object is +not a tuple. This should be clear from the fact that :attr:`x.flat +<ndarray.flat>` is a 1-dimensional view. It can be used for integer +indexing with 1-dimensional C-style-flat indices. The shape of any +returned array is therefore the shape of the integer indexing object. + +.. index:: + single: indexing + single: ndarray diff --git a/doc/source/reference/arrays.interface.rst b/doc/source/reference/arrays.interface.rst new file mode 100644 index 000000000..62f146bc1 --- /dev/null +++ b/doc/source/reference/arrays.interface.rst @@ -0,0 +1,210 @@ +.. index:: + pair: array; interface + pair: array; protocol + +.. _arrays.interface: + +******************* +The Array Interface +******************* + +:version: 3 + +The array interface (sometimes called array protocol) was created in +2005 as a means for array-like Python objects to re-use each other's +data buffers intelligently whenever possible. The homogeneous +N-dimensional array interface is a default mechanism for objects to +share N-dimensional array memory and information. The interface +consists of a Python-side and a C-side using two attributes. Objects +wishing to be considered an N-dimensional array in application code +should support at least one of these attributes. Objects wishing to +support an N-dimensional array in application code should look for at +least one of these attributes and use the information provided +appropriately. + +This interface describes homogeneous arrays in the sense that each +item of the array has the same "type". This type can be very simple +or it can be a quite arbitrary and complicated C-like structure. + +There are two ways to use the interface: A Python side and a C-side. +Both are separate attributes. + +.. note:: + + An alternative to the array interface; + :cfunc:`The Revised Buffer Protocol <PyObject_GetBuffer>`, :pep:`3118` + is introduced in Python 2.6. + +Python side +=========== + +This approach to the interface consists of the object having an +:data:`__array_interface__` attribute. + +.. data:: __array_interface__ + + A dictionary of items (3 required and 5 optional). The optional + keys in the dictionary have implied defaults if they are not + provided. + + The keys are: + + **shape** (required) + + Tuple whose elements are the array size in each dimension. Each + entry is an integer (a Python int or long). Note that these + integers could be larger than the platform "int" or "long" + could hold (a Python int is a C long). It is up to the code + using this attribute to handle this appropriately; either by + raising an error when overflow is possible, or by using + :cdata:`Py_LONG_LONG` as the C type for the shapes. + + **typestr** (required) + + A string providing the basic type of the homogenous array The + basic string format consists of 3 parts: a character describing + the byteorder of the data (``<``: little-endian, ``>``: + big-endian, ``|``: not-relevant), a character code giving the + basic type of the array, and an integer providing the number of + bytes the type uses. + + The basic type character codes are: + + ===== ================================================================ + ``t`` Bit field (following integer gives the number of + bits in the bit field). + ``b`` Boolean (integer type where all values are only True or False) + ``i`` Integer + ``u`` Unsigned integer + ``f`` Floating point + ``c`` Complex floating point + ``O`` Object (i.e. the memory contains a pointer to :ctype:`PyObject`) + ``S`` String (fixed-length sequence of char) + ``U`` Unicode (fixed-length sequence of :ctype:`Py_UNICODE`) + ``V`` Other (void \* -- each item is a fixed-size chunk of memory) + ===== ================================================================ + + **descr** (optional) + + A list of tuples providing a more detailed description of the + memory layout for each item in the homogeneous array. Each + tuple in the list has two or three elements. Normally, this + attribute would be used when *typestr* is ``V[0-9]+``, but this is + not a requirement. The only requirement is that the number of + bytes represented in the *typestr* key is the same as the total + number of bytes represented here. The idea is to support + descriptions of C-like structs (records) that make up array + elements. The elements of each tuple in the list are + + 1. A string providing a name associated with this portion of + the record. This could also be a tuple of ``('full name', + 'basic_name')`` where basic name would be a valid Python + variable name representing the full name of the field. + + 2. Either a basic-type description string as in *typestr* or + another list (for nested records) + + 3. An optional shape tuple providing how many times this part + of the record should be repeated. No repeats are assumed + if this is not given. Very complicated structures can be + described using this generic interface. Notice, however, + that each element of the array is still of the same + data-type. Some examples of using this interface are given + below. + + **Default**: ``[('', typestr)]`` + + **data** (optional) + + A 2-tuple whose first argument is an integer (a long integer + if necessary) that points to the data-area storing the array + contents. This pointer must point to the first element of + data (in other words any offset is always ignored in this + case). The second entry in the tuple is a read-only flag (true + means the data area is read-only). + + This attribute can also be an object exposing the + :cfunc:`buffer interface <PyObject_AsCharBuffer>` which + will be used to share the data. If this key is not present (or + returns :class:`None`), then memory sharing will be done + through the buffer interface of the object itself. In this + case, the offset key can be used to indicate the start of the + buffer. A reference to the object exposing the array interface + must be stored by the new object if the memory area is to be + secured. + + **Default**: :const:`None` + + **strides** (optional) + + Either :const:`None` to indicate a C-style contiguous array or + a Tuple of strides which provides the number of bytes needed + to jump to the next array element in the corresponding + dimension. Each entry must be an integer (a Python + :const:`int` or :const:`long`). As with shape, the values may + be larger than can be represented by a C "int" or "long"; the + calling code should handle this appropiately, either by + raising an error, or by using :ctype:`Py_LONG_LONG` in C. The + default is :const:`None` which implies a C-style contiguous + memory buffer. In this model, the last dimension of the array + varies the fastest. For example, the default strides tuple + for an object whose array entries are 8 bytes long and whose + shape is (10,20,30) would be (4800, 240, 8) + + **Default**: :const:`None` (C-style contiguous) + + **mask** (optional) + + :const:`None` or an object exposing the array interface. All + elements of the mask array should be interpreted only as true + or not true indicating which elements of this array are valid. + The shape of this object should be `"broadcastable" + <arrays.broadcasting.broadcastable>` to the shape of the + original array. + + **Default**: :const:`None` (All array values are valid) + + **offset** (optional) + + An integer offset into the array data region. This can only be + used when data is :const:`None` or returns a :class:`buffer` + object. + + **Default**: 0. + + **version** (required) + + An integer showing the version of the interface (i.e. 3 for + this version). Be careful not to use this to invalidate + objects exposing future versions of the interface. + + +C-struct access +=============== + +This approach to the array interface allows for faster access to an +array using only one attribute lookup and a well-defined C-structure. + +.. cvar:: __array_struct__ + + A :ctype:`PyCObject` whose :cdata:`voidptr` member contains a + pointer to a filled :ctype:`PyArrayInterface` structure. Memory + for the structure is dynamically created and the :ctype:`PyCObject` + is also created with an appropriate destructor so the retriever of + this attribute simply has to apply :cfunc:`Py_DECREF()` to the + object returned by this attribute when it is finished. Also, + either the data needs to be copied out, or a reference to the + object exposing this attribute must be held to ensure the data is + not freed. Objects exposing the :obj:`__array_struct__` interface + must also not reallocate their memory if other objects are + referencing them. + +.. admonition:: New since June 16, 2006: + + In the past most implementations used the "desc" member of the + :ctype:`PyCObject` itself (do not confuse this with the "descr" member of + the :ctype:`PyArrayInterface` structure above --- they are two separate + things) to hold the pointer to the object exposing the interface. + This is now an explicit part of the interface. Be sure to own a + reference to the object when the :ctype:`PyCObject` is created using + :ctype:`PyCObject_FromVoidPtrAndDesc`. diff --git a/doc/source/reference/arrays.ndarray.rst b/doc/source/reference/arrays.ndarray.rst new file mode 100644 index 000000000..f07199603 --- /dev/null +++ b/doc/source/reference/arrays.ndarray.rst @@ -0,0 +1,529 @@ +.. _arrays.ndarray: + +****************************************** +The N-dimensional array (:class:`ndarray`) +****************************************** + +.. currentmodule:: numpy + +An :class:`ndarray` is a (usually fixed-size) multidimensional +container of items of the same type and size. The number of dimensions +and items in an array is defined by its :attr:`shape <ndarray.shape>`, +which is a :class:`tuple` of *N* integers that specify the sizes of +each dimension. The type of items in the array is specified by a +separate :ref:`data-type object (dtype) <arrays.dtypes>`, one of which +is associated with each ndarray. + +As with other container objects in Python, the contents of a +:class:`ndarray` can be accessed and modified by :ref:`indexing or +slicing <arrays.indexing>` the array (using for example *N* integers), +and via the methods and attributes of the :class:`ndarray`. + +.. index:: view, base + +Different :class:`ndarrays <ndarray>` can share the same data, so that +changes made in one :class:`ndarray` may be visible in another. That +is, an ndarray can be a *"view"* to another ndarray, and the data it +is referring to is taken care of by the *"base"* ndarray. ndarrays can +also be views to memory owned by Python :class:`strings <str>` or +objects implementing the :class:`buffer` or :ref:`array +<arrays.interface>` interfaces. + + +.. admonition:: Example + + A 2-dimensional array of size 2 x 3, composed of 4-byte integer elements: + + >>> x = np.array([[1, 2, 3], [4, 5, 6]], np.int32) + >>> type(x) + <type 'numpy.ndarray'> + >>> x.shape + (2, 3) + >>> x.dtype + dtype('int32') + + The array can be indexed using a Python container-like syntax: + + >>> x[1,2] + 6 + + For example :ref:`slicing <arrays.indexing>` can produce views of the array: + + >>> y = x[:,1] + >>> y[0] = 9 + >>> x + array([[1, 9, 3], + [4, 5, 6]]) + + +Constructing arrays +=================== + +New arrays can be constructed using the routines detailed in +:ref:`routines.array-creation`, and also by using the low-level +:class:`ndarray` constructor: + +.. autosummary:: + :toctree: generated/ + + ndarray + +.. _arrays.ndarray.indexing: + + +Indexing arrays +=============== + +Arrays can be indexed using an extended Python slicing syntax, +``array[selection]``. Similar syntax is also used for accessing +fields in a :ref:`record array <arrays.dtypes>`. + +.. seealso:: :ref:`Array Indexing <arrays.indexing>`. + +Internal memory layout of an ndarray +==================================== + +An instance of class :class:`ndarray` consists of a contiguous +one-dimensional segment of computer memory (owned by the array, or by +some other object), combined with an indexing scheme that maps *N* +integers into the location of an item in the block. The ranges in +which the indices can vary is specified by the :obj:`shape +<ndarray.shape>` of the array. How many bytes each item takes and how +the bytes are interpreted is defined by the :ref:`data-type object +<arrays.dtypes>` associated with the array. + +.. index:: C-order, Fortran-order, row-major, column-major, stride, offset + +A segment of memory is inherently 1-dimensional, and there are many +different schemes of arranging the items of an *N*-dimensional array to +a 1-dimensional block. Numpy is flexible, and :class:`ndarray` objects +can accommodate any *strided indexing scheme*. In a strided scheme, +the N-dimensional index :math:`(n_0, n_1, ..., n_{N-1})` corresponds +to the offset (in bytes) + +.. math:: n_{\mathrm{offset}} = \sum_{k=0}^{N-1} s_k n_k + +from the beginning of the memory block associated with the +array. Here, :math:`s_k` are integers which specify the :obj:`strides +<ndarray.strides>` of the array. The :term:`column-major` order (used +for example in the Fortran language and in *Matlab*) and +:term:`row-major` order (used in C) are special cases of the strided +scheme, and correspond to the strides: + +.. math:: + + s_k^{\mathrm{column}} = \prod_{j=0}^{k-1} d_j , \quad s_k^{\mathrm{row}} = \prod_{j=k+1}^{N-1} d_j . + +.. index:: single-segment, contiguous, non-contiguous + +Both the C and Fortran orders are :term:`contiguous`, *i.e.* +:term:`single-segment`, memory layouts, in which every part of the +memory block can be accessed by some combination of the indices. + +Data in new :class:`ndarrays <ndarray>` is in the :term:`row-major` +(C) order, unless otherwise specified, but for example :ref:`basic +array slicing <arrays.indexing>` often produces :term:`views <view>` +in a different scheme. + +.. seealso: :ref:`Indexing <arrays.ndarray.indexing>`_ + +.. note:: + + Several algorithms in NumPy work on arbitrarily strided arrays. + However, some algorithms require single-segment arrays. When an + irregularly strided array is passed in to such algorithms, a copy + is automatically made. + + +Array attributes +================ + +Array attributes reflect information that is intrinsic to the array +itself. Generally, accessing an array through its attributes allows +you to get and sometimes set intrinsic properties of the array without +creating a new array. The exposed attributes are the core parts of an +array and only some of them can be reset meaningfully without creating +a new array. Information on each attribute is given below. + +Memory layout +------------- + +The following attributes contain information about the memory layout +of the array: + +.. autosummary:: + :toctree: generated/ + + ndarray.flags + ndarray.shape + ndarray.strides + ndarray.ndim + ndarray.data + ndarray.size + ndarray.itemsize + ndarray.nbytes + ndarray.base + +.. note:: XXX: update and check these docstrings. + +Data type +--------- + +.. seealso:: :ref:`Data type objects <arrays.dtypes>` + +The data type object associated with the array can be found in the +:attr:`dtype <ndarray.dtype>` attribute: + +.. autosummary:: + :toctree: generated/ + + ndarray.dtype + +.. note:: XXX: update the dtype attribute docstring: setting etc. + +Other attributes +---------------- + +.. autosummary:: + :toctree: generated/ + + ndarray.T + ndarray.real + ndarray.imag + ndarray.flat + ndarray.ctypes + __array_priority__ + + +.. _arrays.ndarray.array-interface: + +Array interface +--------------- + +.. seealso:: :ref:`arrays.interface`. + +========================== =================================== +:obj:`__array_interface__` Python-side of the array interface +:obj:`__array_struct__` C-side of the array interface +========================== =================================== + +:mod:`ctypes` foreign function interface +---------------------------------------- + +.. autosummary:: + :toctree: generated/ + + ndarray.ctypes + +.. note:: XXX: update and check these docstrings. + +Array methods +============= + +An :class:`ndarray` object has many methods which operate on or with +the array in some fashion, typically returning an array result. These +methods are explained below. + +For the following methods there are also corresponding functions in +:mod:`numpy`: :func:`all`, :func:`any`, :func:`argmax`, +:func:`argmin`, :func:`argsort`, :func:`choose`, :func:`clip`, +:func:`compress`, :func:`copy`, :func:`cumprod`, :func:`cumsum`, +:func:`diagonal`, :func:`imag`, :func:`max <amax>`, :func:`mean`, +:func:`min <amin>`, :func:`nonzero`, :func:`prod`, :func:`ptp`, :func:`put`, +:func:`ravel`, :func:`real`, :func:`repeat`, :func:`reshape`, +:func:`round <around>`, :func:`searchsorted`, :func:`sort`, :func:`squeeze`, +:func:`std`, :func:`sum`, :func:`swapaxes`, :func:`take`, +:func:`trace`, :func:`transpose`, :func:`var`. + +Array conversion +---------------- + +.. autosummary:: + :toctree: generated/ + + ndarray.item + ndarray.tolist + ndarray.itemset + ndarray.tostring + ndarray.tofile + ndarray.dump + ndarray.dumps + ndarray.astype + ndarray.byteswap + ndarray.copy + ndarray.view + ndarray.getfield + ndarray.setflags + ndarray.fill + +.. note:: XXX: update and check these docstrings. + +Shape manipulation +------------------ + +For reshape, resize, and transpose, the single tuple argument may be +replaced with ``n`` integers which will be interpreted as an n-tuple. + +.. autosummary:: + :toctree: generated/ + + ndarray.reshape + ndarray.resize + ndarray.transpose + ndarray.swapaxes + ndarray.flatten + ndarray.ravel + ndarray.squeeze + +Item selection and manipulation +------------------------------- + +For array methods that take an *axis* keyword, it defaults to +:const:`None`. If axis is *None*, then the array is treated as a 1-D +array. Any other value for *axis* represents the dimension along which +the operation should proceed. + +.. autosummary:: + :toctree: generated/ + + ndarray.take + ndarray.put + ndarray.repeat + ndarray.choose + ndarray.sort + ndarray.argsort + ndarray.searchsorted + ndarray.nonzero + ndarray.compress + ndarray.diagonal + +Calculation +----------- + +.. index:: axis + +Many of these methods take an argument named *axis*. In such cases, + +- If *axis* is *None* (the default), the array is treated as a 1-D + array and the operation is performed over the entire array. This + behavior is also the default if self is a 0-dimensional array or + array scalar. + +- If *axis* is an integer, then the operation is done over the given axis + (for each 1-D subarray that can be created along the given axis). + +The parameter *dtype* specifies the data type over which a reduction +operation (like summing) should take place. The default reduce data +type is the same as the data type of *self*. To avoid overflow, it can +be useful to perform the reduction using a larger data type. + +For several methods, an optional *out* argument can also be provided +and the result will be placed into the output array given. The *out* +argument must be an :class:`ndarray` and have the same number of +elements. It can have a different data type in which case casting will +be performed. + + +.. autosummary:: + :toctree: generated/ + + ndarray.argmax + ndarray.min + ndarray.argmin + ndarray.ptp + ndarray.clip + ndarray.conj + ndarray.round + ndarray.trace + ndarray.sum + ndarray.cumsum + ndarray.mean + ndarray.var + ndarray.std + ndarray.prod + ndarray.cumprod + ndarray.all + ndarray.any + +Arithmetic and comparison operations +==================================== + +.. note:: XXX: write all attributes explicitly here instead of relying on + the auto\* stuff? + +.. index:: comparison, arithmetic, operation, operator + +Arithmetic and comparison operations on :class:`ndarrays <ndarray>` +are defined as element-wise operations, and generally yield +:class:`ndarray` objects as results. + +Each of the arithmetic operations (``+``, ``-``, ``*``, ``/``, ``//``, +``%``, ``divmod()``, ``**`` or ``pow()``, ``<<``, ``>>``, ``&``, +``^``, ``|``, ``~``) and the comparisons (``==``, ``<``, ``>``, +``<=``, ``>=``, ``!=``) is equivalent to the corresponding +:term:`universal function` (or :term:`ufunc` for short) in Numpy. For +more information, see the section on :ref:`Universal Functions +<ufuncs>`. + +Comparison operators: + +.. autosummary:: + :toctree: generated/ + + ndarray.__lt__ + ndarray.__le__ + ndarray.__gt__ + ndarray.__ge__ + ndarray.__eq__ + ndarray.__ne__ + +Truth value of an array (:func:`bool()`): + +.. autosummary:: + :toctree: generated/ + + ndarray.__nonzero__ + +.. note:: + + Truth-value testing of an array invokes + :meth:`ndarray.__nonzero__`, which raises an error if the number of + elements in the the array is larger than 1, because the truth value + of such arrays is ambiguous. Use :meth:`.any() <ndarray.any>` and + :meth:`.all() <ndarray.all>` instead to be clear about what is meant in + such cases. (If the number of elements is 0, the array evaluates to + ``False``.) + + +Unary operations: + +.. autosummary:: + :toctree: generated/ + + ndarray.__neg__ + ndarray.__pos__ + ndarray.__abs__ + ndarray.__invert__ + +Arithmetic: + +.. autosummary:: + :toctree: generated/ + + ndarray.__add__ + ndarray.__sub__ + ndarray.__mul__ + ndarray.__div__ + ndarray.__truediv__ + ndarray.__floordiv__ + ndarray.__mod__ + ndarray.__divmod__ + ndarray.__pow__ + ndarray.__lshift__ + ndarray.__rshift__ + ndarray.__and__ + ndarray.__or__ + ndarray.__xor__ + +.. note:: + + - Any third argument to :func:`pow()` is silently ignored, + as the underlying :func:`ufunc <power>` only takes two arguments. + + - The three division operators are all defined; :obj:`div` is active + by default, :obj:`truediv` is active when + :obj:`__future__` division is in effect. + + - Because :class:`ndarray` is a built-in type (written in C), the + ``__r{op}__`` special methods are not directly defined. + + - The functions called to implement many arithmetic special methods + for arrays can be modified using :func:`set_numeric_ops`. + +Arithmetic, in-place: + +.. autosummary:: + :toctree: generated/ + + ndarray.__iadd__ + ndarray.__isub__ + ndarray.__imul__ + ndarray.__idiv__ + ndarray.__itruediv__ + ndarray.__ifloordiv__ + ndarray.__imod__ + ndarray.__ipow__ + ndarray.__ilshift__ + ndarray.__irshift__ + ndarray.__iand__ + ndarray.__ior__ + ndarray.__ixor__ + +.. warning:: + + In place operations will perform the calculation using the + precision decided by the data type of the two operands, but will + silently downcast the result (if necessary) so it can fit back into + the array. Therefore, for mixed precision calculations, ``A {op}= + B`` can be different than ``A = A {op} B``. For example, suppose + ``a = ones((3,3))``. Then, ``a += 3j`` is different than ``a = a + + 3j``: While they both perform the same computation, ``a += 3`` + casts the result to fit back in ``a``, whereas ``a = a + 3j`` + re-binds the name ``a`` to the result. + + +Special methods +=============== + +For standard library functions: + +.. autosummary:: + :toctree: generated/ + + ndarray.__copy__ + ndarray.__deepcopy__ + ndarray.__reduce__ + ndarray.__setstate__ + +Basic customization: + +.. autosummary:: + :toctree: generated/ + + ndarray.__new__ + ndarray.__array__ + ndarray.__array_wrap__ + +Container customization: (see :ref:`Indexing <arrays.indexing>`) + +.. autosummary:: + :toctree: generated/ + + ndarray.__len__ + ndarray.__getitem__ + ndarray.__setitem__ + ndarray.__getslice__ + ndarray.__setslice__ + ndarray.__contains__ + +Conversion; the operations :func:`complex()`, :func:`int()`, +:func:`long()`, :func:`float()`, :func:`oct()`, and +:func:`hex()`. They work only on arrays that have one element in them +and return the appropriate scalar. + +.. autosummary:: + :toctree: generated/ + + ndarray.__int__ + ndarray.__long__ + ndarray.__float__ + ndarray.__oct__ + ndarray.__hex__ + +String representations: + +.. autosummary:: + :toctree: generated/ + + ndarray.__str__ + ndarray.__repr__ diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst new file mode 100644 index 000000000..b6d28fe2c --- /dev/null +++ b/doc/source/reference/arrays.rst @@ -0,0 +1,46 @@ +.. _arrays: + +************* +Array objects +************* + +.. currentmodule:: numpy + +NumPy provides an N-dimensional array type, the :ref:`ndarray +<arrays.ndarray>`, which describes a collection of "items" of the same +type. The items can be :ref:`indexed <arrays.indexing>` using for +example N integers. + +All ndarrays are :term:`homogenous`: every item takes up the same size +block of memory, and all blocks are interpreted in exactly the same +way. How each item in the array is to be interpreted is specified by a +separate :ref:`data-type object <arrays.dtypes>`, one of which is associated +with every array. In addition to basic types (integers, floats, +*etc.*), the data type objects can also represent data structures. + +An item extracted from an array, *e.g.*, by indexing, is represented +by a Python object whose type is one of the :ref:`array scalar types +<arrays.scalars>` built in Numpy. The array scalars allow easy manipulation +of also more complicated arrangements of data. + +.. figure:: figures/threefundamental.png + + **Figure** + Conceptual diagram showing the relationship between the three + fundamental objects used to describe the data in an array: 1) the + ndarray itself, 2) the data-type object that describes the layout + of a single fixed-size element of the array, 3) the array-scalar + Python object that is returned when a single element of the array + is accessed. + + + +.. toctree:: + :maxdepth: 2 + + arrays.ndarray + arrays.scalars + arrays.dtypes + arrays.indexing + arrays.classes + arrays.interface diff --git a/doc/source/reference/arrays.scalars.rst b/doc/source/reference/arrays.scalars.rst new file mode 100644 index 000000000..70c1d07c9 --- /dev/null +++ b/doc/source/reference/arrays.scalars.rst @@ -0,0 +1,288 @@ +.. _arrays.scalars: + +******* +Scalars +******* + +.. currentmodule:: numpy + +Python defines only one type of a particular data class (there is only +one integer type, one floating-point type, etc.). This can be +convenient in applications that don't need to be concerned with all +the ways data can be represented in a computer. For scientific +computing, however, more control is often needed. + +In NumPy, there are 21 new fundamental Python types to describe +different types of scalars. These type descriptors are mostly based on +the types available in the C language that CPython is written in, with +several additional types compatible with Python's types. + +Array scalars have the same attributes and methods as :class:`ndarrays +<ndarray>`. [#]_ This allows one to treat items of an array partly on +the same footing as arrays, smoothing out rough edges that result when +mixing scalar and array operations. + +Array scalars live in a hierarchy (see the Figure below) of data +types. They can be detected using the hierarchy: For example, +``isinstance(val, np.generic)`` will return :const:`True` if *val* is +an array scalar object. Alternatively, what kind of array scalar is +present can be determined using other members of the data type +hierarchy. Thus, for example ``isinstance(val, np.complexfloating)`` +will return :const:`True` if *val* is a complex valued type, while +:const:`isinstance(val, np.flexible)` will return true if *val* is one +of the flexible itemsize array types (:class:`string`, +:class:`unicode`, :class:`void`). + +.. figure:: figures/dtype-hierarchy.png + + **Figure:** Hierarchy of type objects representing the array data + types. Not shown are the two integer types :class:`intp` and + :class:`uintp` which just point to the integer type that holds a + pointer for the platform. All the number types can be obtained + using bit-width names as well. + +.. [#] However, array scalars are immutable, so that none of the array + scalar attributes are settable. + +.. _arrays.scalars.character-codes: + +.. _arrays.scalars.built-in: + +Built-in scalar types +===================== + +The built-in scalar types are shown below. Along with their (mostly) +C-derived names, the integer, float, and complex data-types are also +available using a bit-width convention so that an array of the right +size can always be ensured (e.g. :class:`int8`, :class:`float64`, +:class:`complex128`). Two aliases (:class:`intp` and :class:`uintp`) +pointing to the integer type that is sufficiently large to hold a C pointer +are also provided. The C-like names are associated with character codes, +which are shown in the table. Use of the character codes, however, +is discouraged. + +Five of the scalar types are essentially equivalent to fundamental +Python types and therefore inherit from them as well as from the +generic array scalar type: + +==================== ==================== +Array scalar type Related Python type +==================== ==================== +:class:`int_` :class:`IntType` +:class:`float_` :class:`FloatType` +:class:`complex_` :class:`ComplexType` +:class:`str_` :class:`StringType` +:class:`unicode_` :class:`UnicodeType` +==================== ==================== + +The :class:`bool_` data type is very similar to the Python +:class:`BooleanType` but does not inherit from it because Python's +:class:`BooleanType` does not allow itself to be inherited from, and +on the C-level the size of the actual bool data is not the same as a +Python Boolean scalar. + +.. warning:: + + The :class:`bool_` type is not a subclass of the :class:`int_` type + (the :class:`bool_` is not even a number type). This is different + than Python's default implementation of :class:`bool` as a + sub-class of int. + + +.. tip:: The default data type in Numpy is :class:`float_`. + +In the tables below, ``platform?`` means that the type may not +available on all platforms. Compatibility with different C or Python +types is indicated: two types are compatible if their data is of the +same size and interpreted in the same way. + +Booleans: + +=================== ============================= =============== +Type Remarks Character code +=================== ============================= =============== +:class:`bool_` compatible: Python bool ``'?'`` +:class:`bool8` 8 bits +=================== ============================= =============== + +Integers: + +=================== ============================= =============== +:class:`byte` compatible: C char ``'b'`` +:class:`short` compatible: C short ``'h'`` +:class:`intc` compatible: C int ``'i'`` +:class:`int_` compatible: Python int ``'l'`` +:class:`longlong` compatible: C long long ``'q'`` +:class:`intp` large enough to fit a pointer ``'p'`` +:class:`int8` 8 bits +:class:`int16` 16 bits +:class:`int32` 32 bits +:class:`int64` 64 bits +=================== ============================= =============== + +Unsigned integers: + +=================== ============================= =============== +:class:`ubyte` compatible: C unsigned char ``'B'`` +:class:`ushort` compatible: C unsigned short ``'H'`` +:class:`uintc` compatible: C unsigned int ``'I'`` +:class:`uint` compatible: Python int ``'L'`` +:class:`ulonglong` compatible: C long long ``'Q'`` +:class:`uintp` large enough to fit a pointer ``'P'`` +:class:`uint8` 8 bits +:class:`uint16` 16 bits +:class:`uint32` 32 bits +:class:`uint64` 64 bits +=================== ============================= =============== + +Floating-point numbers: + +=================== ============================= =============== +:class:`single` compatible: C float ``'f'`` +:class:`double` compatible: C double +:class:`float_` compatible: Python float ``'d'`` +:class:`longfloat` compatible: C long float ``'g'`` +:class:`float32` 32 bits +:class:`float64` 64 bits +:class:`float96` 92 bits, platform? +:class:`float128` 128 bits, platform? +=================== ============================= =============== + +Complex floating-point numbers: + +=================== ============================= =============== +:class:`csingle` ``'F'`` +:class:`complex_` compatible: Python complex ``'D'`` +:class:`clongfloat` ``'G'`` +:class:`complex64` two 32-bit floats +:class:`complex128` two 64-bit floats +:class:`complex192` two 96-bit floats, + platform? +:class:`complex256` two 128-bit floats, + platform? +=================== ============================= =============== + +Any Python object: + +=================== ============================= =============== +:class:`object_` any Python object ``'O'`` +=================== ============================= =============== + +.. note:: + + The data actually stored in :term:`object arrays <object array>` + (*i.e.* arrays having dtype :class:`object_`) are references to + Python objects, not the objects themselves. Hence, object arrays + behave more like usual Python :class:`lists <list>`, in the sense + that their contents need not be of the same Python type. + + The object type is also special because an array containing + :class:`object_` items does not return an :class:`object_` object + on item access, but instead returns the actual object that + the array item refers to. + +The following data types are :term:`flexible`. They have no predefined +size: the data they describe can be of different length in different +arrays. (In the character codes ``#`` is an integer denoting how many +elements the data type consists of.) + +=================== ============================= ======== +:class:`str_` compatible: Python str ``'S#'`` +:class:`unicode_` compatible: Python unicode ``'U#'`` +:class:`void` ``'V#'`` +=================== ============================= ======== + + +.. warning:: + + Numeric Compatibility: If you used old typecode characters in your + Numeric code (which was never recommended), you will need to change + some of them to the new characters. In particular, the needed + changes are ``c -> S1``, ``b -> B``, ``1 -> b``, ``s -> h``, ``w -> + H``, and ``u -> I``. These changes make the type character + convention more consistent with other Python modules such as the + :mod:`struct` module. + + +.. note:: XXX: what to put in the type docstrings, and where to put them? + +Attributes +========== + +The array scalar objects have an :obj:`array priority +<__array_priority__>` of :cdata:`NPY_SCALAR_PRIORITY` +(-1,000,000.0). They also do not (yet) have a :attr:`ctypes <ndarray.ctypes>` +attribute. Otherwise, they share the same attributes as arrays: + +.. autosummary:: + :toctree: generated/ + + generic.flags + generic.shape + generic.strides + generic.ndim + generic.data + generic.size + generic.itemsize + generic.base + generic.dtype + generic.real + generic.imag + generic.flat + generic.T + generic.__array_interface__ + generic.__array_struct__ + generic.__array_priority__ + generic.__array_wrap__ + +.. note:: XXX: import the documentation into the docstrings? + +Indexing +======== +.. seealso:: :ref:`arrays.indexing`, :ref:`arrays.dtypes` + +Array scalars can be indexed like 0-dimensional arrays: if *x* is an +array scalar, + +- ``x[()]`` returns a 0-dimensional :class:`ndarray` +- ``x['field-name']`` returns the array scalar in the field *field-name*. + (*x* can have fields, for example, when it corresponds to a record data type.) + +Methods +======= + +Array scalars have exactly the same methods as arrays. The default +behavior of these methods is to internally convert the scalar to an +equivalent 0-dimensional array and to call the corresponding array +method. In addition, math operations on array scalars are defined so +that the same hardware flags are set and used to interpret the results +as for :ref:`ufunc <ufuncs>`, so that the error state used for ufuncs +also carries over to the math on array scalars. + +The exceptions to the above rules are given below: + +.. autosummary:: + :toctree: generated/ + + generic + generic.__array__ + generic.__array_wrap__ + generic.__squeeze__ + generic.byteswap + generic.__reduce__ + generic.__setstate__ + generic.setflags + +.. note:: XXX: import the documentation into the docstrings? + +Defining new types +================== + +There are two ways to effectively define a new array scalar type +(apart from composing record :ref:`dtypes <arrays.dtypes>` from the built-in +scalar types): One way is to simply subclass the :class:`ndarray` and +overwrite the methods of interest. This will work to a degree, but +internally certain behaviors are fixed by the data type of the array. +To fully customize the data type of an array you need to define a new +data-type, and register it with NumPy. Such new types can only be +defined in C, using the :ref:`Numpy C-API <c-api>`. diff --git a/doc/source/reference/c-api.array.rst b/doc/source/reference/c-api.array.rst new file mode 100644 index 000000000..56950a8d9 --- /dev/null +++ b/doc/source/reference/c-api.array.rst @@ -0,0 +1,2635 @@ +Array API +========= + +.. sectionauthor:: Travis E. Oliphant + +| The test of a first-rate intelligence is the ability to hold two +| opposed ideas in the mind at the same time, and still retain the +| ability to function. +| --- *F. Scott Fitzgerald* + +| For a successful technology, reality must take precedence over public +| relations, for Nature cannot be fooled. +| --- *Richard P. Feynman* + +.. index:: + pair: ndarray; C-API + pair: C-API; array + + +Array structure and data access +------------------------------- + +These macros all access the :ctype:`PyArrayObject` structure members. The input +argument, obj, can be any :ctype:`PyObject *` that is directly interpretable +as a :ctype:`PyArrayObject *` (any instance of the :cdata:`PyArray_Type` and its +sub-types). + +.. cfunction:: void *PyArray_DATA(PyObject *obj) + +.. cfunction:: char *PyArray_BYTES(PyObject *obj) + + These two macros are similar and obtain the pointer to the + data-buffer for the array. The first macro can (and should be) + assigned to a particular pointer where the second is for generic + processing. If you have not guaranteed a contiguous and/or aligned + array then be sure you understand how to access the data in the + array to avoid memory and/or alignment problems. + +.. cfunction:: npy_intp *PyArray_DIMS(PyObject *arr) + +.. cfunction:: npy_intp *PyArray_STRIDES(PyObject* arr) + +.. cfunction:: npy_intp PyArray_DIM(PyObject* arr, int n) + + Return the shape in the *n* :math:`^{\textrm{th}}` dimension. + +.. cfunction:: npy_intp PyArray_STRIDE(PyObject* arr, int n) + + Return the stride in the *n* :math:`^{\textrm{th}}` dimension. + +.. cfunction:: PyObject *PyArray_BASE(PyObject* arr) + +.. cfunction:: PyArray_Descr *PyArray_DESCR(PyObject* arr) + +.. cfunction:: int PyArray_FLAGS(PyObject* arr) + +.. cfunction:: int PyArray_ITEMSIZE(PyObject* arr) + + Return the itemsize for the elements of this array. + +.. cfunction:: int PyArray_TYPE(PyObject* arr) + + Return the (builtin) typenumber for the elements of this array. + +.. cfunction:: PyObject *PyArray_GETITEM(PyObject* arr, void* itemptr) + + Get a Python object from the ndarray, *arr*, at the location + pointed to by itemptr. Return ``NULL`` on failure. + +.. cfunction:: int PyArray_SETITEM(PyObject* arr, void* itemptr, PyObject* obj) + + Convert obj and place it in the ndarray, *arr*, at the place + pointed to by itemptr. Return -1 if an error occurs or 0 on + success. + +.. cfunction:: npy_intp PyArray_SIZE(PyObject* arr) + + Returns the total size (in number of elements) of the array. + +.. cfunction:: npy_intp PyArray_Size(PyObject* obj) + + Returns 0 if *obj* is not a sub-class of bigndarray. Otherwise, + returns the total number of elements in the array. Safer version + of :cfunc:`PyArray_SIZE` (*obj*). + +.. cfunction:: npy_intp PyArray_NBYTES(PyObject* arr) + + Returns the total number of bytes consumed by the array. + + +Data access +^^^^^^^^^^^ + +These functions and macros provide easy access to elements of the +ndarray from C. These work for all arrays. You may need to take care +when accessing the data in the array, however, if it is not in machine +byte-order, misaligned, or not writeable. In other words, be sure to +respect the state of the flags unless you know what you are doing, or +have previously guaranteed an array that is writeable, aligned, and in +machine byte-order using :cfunc:`PyArray_FromAny`. If you wish to handle all +types of arrays, the copyswap function for each type is useful for +handling misbehaved arrays. Some platforms (e.g. Solaris) do not like +misaligned data and will crash if you de-reference a misaligned +pointer. Other platforms (e.g. x86 Linux) will just work more slowly +with misaligned data. + +.. cfunction:: void* PyArray_GetPtr(PyArrayObject* aobj, npy_intp* ind) + + Return a pointer to the data of the ndarray, *aobj*, at the + N-dimensional index given by the c-array, *ind*, (which must be + at least *aobj* ->nd in size). You may want to typecast the + returned pointer to the data type of the ndarray. + +.. cfunction:: void* PyArray_GETPTR1(PyObject* obj, <npy_intp> i) + +.. cfunction:: void* PyArray_GETPTR2(PyObject* obj, <npy_intp> i, <npy_intp> j) + +.. cfunction:: void* PyArray_GETPTR3(PyObject* obj, <npy_intp> i, <npy_intp> j, <npy_intp> k) + +.. cfunction:: void* PyArray_GETPTR4(PyObject* obj, <npy_intp> i, <npy_intp> j, <npy_intp> k, <npy_intp> l) + + Quick, inline access to the element at the given coordinates in + the ndarray, *obj*, which must have respectively 1, 2, 3, or 4 + dimensions (this is not checked). The corresponding *i*, *j*, + *k*, and *l* coordinates can be any integer but will be + interpreted as ``npy_intp``. You may want to typecast the + returned pointer to the data type of the ndarray. + + +Creating arrays +--------------- + + +From scratch +^^^^^^^^^^^^ + +.. cfunction:: PyObject* PyArray_NewFromDescr(PyTypeObject* subtype, PyArray_Descr* descr, int nd, npy_intp* dims, npy_intp* strides, void* data, int flags, PyObject* obj) + + This is the main array creation function. Most new arrays are + created with this flexible function. The returned object is an + object of Python-type *subtype*, which must be a subtype of + :cdata:`PyArray_Type`. The array has *nd* dimensions, described by + *dims*. The data-type descriptor of the new array is *descr*. If + *subtype* is not :cdata:`&PyArray_Type` (*e.g.* a Python subclass of + the ndarray), then *obj* is the object to pass to the + :obj:`__array_finalize__` method of the subclass. If *data* is + ``NULL``, then new memory will be allocated and *flags* can be + non-zero to indicate a Fortran-style contiguous array. If *data* + is not ``NULL``, then it is assumed to point to the memory to be + used for the array and the *flags* argument is used as the new + flags for the array (except the state of :cdata:`NPY_OWNDATA` and + :cdata:`UPDATEIFCOPY` flags of the new array will be reset). In + addition, if *data* is non-NULL, then *strides* can also be + provided. If *strides* is ``NULL``, then the array strides are + computed as C-style contiguous (default) or Fortran-style + contiguous (*flags* is nonzero for *data* = ``NULL`` or *flags* & + :cdata:`NPY_F_CONTIGUOUS` is nonzero non-NULL *data*). Any provided + *dims* and *strides* are copied into newly allocated dimension and + strides arrays for the new array object. + +.. cfunction:: PyObject* PyArray_New(PyTypeObject* subtype, int nd, npy_intp* dims, int type_num, npy_intp* strides, void* data, int itemsize, int flags, PyObject* obj) + + This is similar to :cfunc:`PyArray_DescrNew` (...) except you + specify the data-type descriptor with *type_num* and *itemsize*, + where *type_num* corresponds to a builtin (or user-defined) + type. If the type always has the same number of bytes, then + itemsize is ignored. Otherwise, itemsize specifies the particular + size of this array. + + + +.. warning:: + + If data is passed to :cfunc:`PyArray_NewFromDescr` or :cfunc:`PyArray_New`, + this memory must not be deallocated until the new array is + deleted. If this data came from another Python object, this can + be accomplished using :cfunc:`Py_INCREF` on that object and setting the + base member of the new array to point to that object. If strides + are passed in they must be consistent with the dimensions, the + itemsize, and the data of the array. + +.. cfunction:: PyObject* PyArray_SimpleNew(int nd, npy_intp* dims, int typenum) + + Create a new unitialized array of type, *typenum*, whose size in + each of *nd* dimensions is given by the integer array, *dims*. + This function cannot be used to create a flexible-type array (no + itemsize given). + +.. cfunction:: PyObject* PyArray_SimpleNewFromData(int nd, npy_intp* dims, int typenum, void* data) + + Create an array wrapper around *data* pointed to by the given + pointer. The array flags will have a default that the data area is + well-behaved and C-style contiguous. The shape of the array is + given by the *dims* c-array of length *nd*. The data-type of the + array is indicated by *typenum*. + +.. cfunction:: PyObject* PyArray_SimpleNewFromDescr(int nd, npy_intp* dims, PyArray_Descr* descr) + + Create a new array with the provided data-type descriptor, *descr* + , of the shape deteremined by *nd* and *dims*. + +.. cfunction:: PyArray_FILLWBYTE(PyObject* obj, int val) + + Fill the array pointed to by *obj* ---which must be a (subclass + of) bigndarray---with the contents of *val* (evaluated as a byte). + +.. cfunction:: PyObject* PyArray_Zeros(int nd, npy_intp* dims, PyArray_Descr* dtype, int fortran) + + Construct a new *nd* -dimensional array with shape given by *dims* + and data type given by *dtype*. If *fortran* is non-zero, then a + Fortran-order array is created, otherwise a C-order array is + created. Fill the memory with zeros (or the 0 object if *dtype* + corresponds to :ctype:`PyArray_OBJECT` ). + +.. cfunction:: PyObject* PyArray_ZEROS(int nd, npy_intp* dims, int type_num, int fortran) + + Macro form of :cfunc:`PyArray_Zeros` which takes a type-number instead + of a data-type object. + +.. cfunction:: PyObject* PyArray_Empty(int nd, npy_intp* dims, PyArray_Descr* dtype, int fortran) + + Construct a new *nd* -dimensional array with shape given by *dims* + and data type given by *dtype*. If *fortran* is non-zero, then a + Fortran-order array is created, otherwise a C-order array is + created. The array is uninitialized unless the data type + corresponds to :ctype:`PyArray_OBJECT` in which case the array is + filled with :cdata:`Py_None`. + +.. cfunction:: PyObject* PyArray_EMPTY(int nd, npy_intp* dims, int typenum, int fortran) + + Macro form of :cfunc:`PyArray_Empty` which takes a type-number, + *typenum*, instead of a data-type object. + +.. cfunction:: PyObject* PyArray_Arange(double start, double stop, double step, int typenum) + + Construct a new 1-dimensional array of data-type, *typenum*, that + ranges from *start* to *stop* (exclusive) in increments of *step* + . Equivalent to **arange** (*start*, *stop*, *step*, dtype). + +.. cfunction:: PyObject* PyArray_ArangeObj(PyObject* start, PyObject* stop, PyObject* step, PyArray_Descr* descr) + + Construct a new 1-dimensional array of data-type determined by + ``descr``, that ranges from ``start`` to ``stop`` (exclusive) in + increments of ``step``. Equivalent to arange( ``start``, + ``stop``, ``step``, ``typenum`` ). + + +From other objects +^^^^^^^^^^^^^^^^^^ + +.. cfunction:: PyObject* PyArray_FromAny(PyObject* op, PyArray_Descr* dtype, int min_depth, int max_depth, int requirements, PyObject* context) + + This is the main function used to obtain an array from any nested + sequence, or object that exposes the array interface, ``op``. The + parameters allow specification of the required *type*, the + minimum (*min_depth*) and maximum (*max_depth*) number of + dimensions acceptable, and other *requirements* for the array. The + *dtype* argument needs to be a :ctype:`PyArray_Descr` structure + indicating the desired data-type (including required + byteorder). The *dtype* argument may be NULL, indicating that any + data-type (and byteorder) is acceptable. If you want to use + ``NULL`` for the *dtype* and ensure the array is notswapped then + use :cfunc:`PyArray_CheckFromAny`. A value of 0 for either of the + depth parameters causes the parameter to be ignored. Any of the + following array flags can be added (*e.g.* using \|) to get the + *requirements* argument. If your code can handle general (*e.g.* + strided, byte-swapped, or unaligned arrays) then *requirements* + may be 0. Also, if *op* is not already an array (or does not + expose the array interface), then a new array will be created (and + filled from *op* using the sequence protocol). The new array will + have :cdata:`NPY_DEFAULT` as its flags member. The *context* argument + is passed to the :obj:`__array__` method of *op* and is only used if + the array is constructed that way. + + .. cvar:: NPY_C_CONTIGUOUS + + Make sure the returned array is C-style contiguous + + .. cvar:: NPY_F_CONTIGUOUS + + Make sure the returned array is Fortran-style contiguous. + + .. cvar:: NPY_ALIGNED + + Make sure the returned array is aligned on proper boundaries for its + data type. An aligned array has the data pointer and every strides + factor as a multiple of the alignment factor for the data-type- + descriptor. + + .. cvar:: NPY_WRITEABLE + + Make sure the returned array can be written to. + + .. cvar:: NPY_ENSURECOPY + + Make sure a copy is made of *op*. If this flag is not + present, data is not copied if it can be avoided. + + .. cvar:: NPY_ENSUREARRAY + + Make sure the result is a base-class ndarray or bigndarray. By + default, if *op* is an instance of a subclass of the + bigndarray, an instance of that same subclass is returned. If + this flag is set, an ndarray object will be returned instead. + + .. cvar:: NPY_FORCECAST + + Force a cast to the output type even if it cannot be done + safely. Without this flag, a data cast will occur only if it + can be done safely, otherwise an error is reaised. + + .. cvar:: NPY_UPDATEIFCOPY + + If *op* is already an array, but does not satisfy the + requirements, then a copy is made (which will satisfy the + requirements). If this flag is present and a copy (of an + object that is already an array) must be made, then the + corresponding :cdata:`NPY_UPDATEIFCOPY` flag is set in the returned + copy and *op* is made to be read-only. When the returned copy + is deleted (presumably after your calculations are complete), + its contents will be copied back into *op* and the *op* array + will be made writeable again. If *op* is not writeable to + begin with, then an error is raised. If *op* is not already an + array, then this flag has no effect. + + .. cvar:: NPY_BEHAVED + + :cdata:`NPY_ALIGNED` \| :cdata:`NPY_WRITEABLE` + + .. cvar:: NPY_CARRAY + + :cdata:`NPY_C_CONTIGUOUS` \| :cdata:`NPY_BEHAVED` + + .. cvar:: NPY_CARRAY_RO + + :cdata:`NPY_C_CONTIGUOUS` \| :cdata:`NPY_ALIGNED` + + .. cvar:: NPY_FARRAY + + :cdata:`NPY_F_CONTIGUOUS` \| :cdata:`NPY_BEHAVED` + + .. cvar:: NPY_FARRAY_RO + + :cdata:`NPY_F_CONTIGUOUS` \| :cdata:`NPY_ALIGNED` + + .. cvar:: NPY_DEFAULT + + :cdata:`NPY_CARRAY` + + .. cvar:: NPY_IN_ARRAY + + :cdata:`NPY_CONTIGUOUS` \| :cdata:`NPY_ALIGNED` + + .. cvar:: NPY_IN_FARRAY + + :cdata:`NPY_F_CONTIGUOUS` \| :cdata:`NPY_ALIGNED` + + .. cvar:: NPY_INOUT_ARRAY + + :cdata:`NPY_C_CONTIGUOUS` \| :cdata:`NPY_WRITEABLE` \| + :cdata:`NPY_ALIGNED` + + .. cvar:: NPY_INOUT_FARRAY + + :cdata:`NPY_F_CONTIGUOUS` \| :cdata:`NPY_WRITEABLE` \| + :cdata:`NPY_ALIGNED` + + .. cvar:: NPY_OUT_ARRAY + + :cdata:`NPY_C_CONTIGUOUS` \| :cdata:`NPY_WRITEABLE` \| + :cdata:`NPY_ALIGNED` \| :cdata:`NPY_UPDATEIFCOPY` + + .. cvar:: NPY_OUT_FARRAY + + :cdata:`NPY_F_CONTIGUOUS` \| :cdata:`NPY_WRITEABLE` \| + :cdata:`NPY_ALIGNED` \| :cdata:`UPDATEIFCOPY` + + +.. cfunction:: PyObject* PyArray_CheckFromAny(PyObject* op, PyArray_Descr* dtype, int min_depth, int max_depth, int requirements, PyObject* context) + + Nearly identical to :cfunc:`PyArray_FromAny` (...) except + *requirements* can contain :cdata:`NPY_NOTSWAPPED` (over-riding the + specification in *dtype*) and :cdata:`NPY_ELEMENTSTRIDES` which + indicates that the array should be aligned in the sense that the + strides are multiples of the element size. + +.. cvar:: NPY_NOTSWAPPED + + Make sure the returned array has a data-type descriptor that is in + machine byte-order, over-riding any specification in the *dtype* + argument. Normally, the byte-order requirement is determined by + the *dtype* argument. If this flag is set and the dtype argument + does not indicate a machine byte-order descriptor (or is NULL and + the object is already an array with a data-type descriptor that is + not in machine byte- order), then a new data-type descriptor is + created and used with its byte-order field set to native. + +.. cvar:: NPY_BEHAVED_NS + + :cdata:`NPY_ALIGNED` \| :cdata:`NPY_WRITEABLE` \| :cdata:`NPY_NOTSWAPPED` + +.. cvar:: NPY_ELEMENTSTRIDES + + Make sure the returned array has strides that are multiples of the + element size. + +.. cfunction:: PyObject* PyArray_FromArray(PyArrayObject* op, PyArray_Descr* newtype, int requirements) + + Special case of :cfunc:`PyArray_FromAny` for when *op* is already an + array but it needs to be of a specific *newtype* (including + byte-order) or has certain *requirements*. + +.. cfunction:: PyObject* PyArray_FromStructInterface(PyObject* op) + + Returns an ndarray object from a Python object that exposes the + :obj:`__array_struct__`` method and follows the array interface + protocol. If the object does not contain this method then a + borrowed reference to :cdata:`Py_NotImplemented` is returned. + +.. cfunction:: PyObject* PyArray_FromInterface(PyObject* op) + + Returns an ndarray object from a Python object that exposes the + :obj:`__array_shape__` and :obj:`__array_typestr__` + methods following + the array interface protocol. If the object does not contain one + of these method then a borrowed reference to :cdata:`Py_NotImplemented` + is returned. + +.. cfunction:: PyObject* PyArray_FromArrayAttr(PyObject* op, PyArray_Descr* dtype, PyObject* context) + + Return an ndarray object from a Python object that exposes the + :obj:`__array__` method. The :obj:`__array__` method can take 0, 1, or 2 + arguments ([dtype, context]) where *context* is used to pass + information about where the :obj:`__array__` method is being called + from (currently only used in ufuncs). + +.. cfunction:: PyObject* PyArray_ContiguousFromAny(PyObject* op, int typenum, int min_depth, int max_depth) + + This function returns a (C-style) contiguous and behaved function + array from any nested sequence or array interface exporting + object, *op*, of (non-flexible) type given by the enumerated + *typenum*, of minimum depth *min_depth*, and of maximum depth + *max_depth*. Equivalent to a call to :cfunc:`PyArray_FromAny` with + requirements set to :cdata:`NPY_DEFAULT` and the type_num member of the + type argument set to *typenum*. + +.. cfunction:: PyObject *PyArray_FromObject(PyObject *op, int typenum, int min_depth, int max_depth) + + Return an aligned and in native-byteorder array from any nested + sequence or array-interface exporting object, op, of a type given by + the enumerated typenum. The minimum number of dimensions the array can + have is given by min_depth while the maximum is max_depth. This is + equivalent to a call to :cfunc:`PyArray_FromAny` with requirements set to + BEHAVED. + +.. cfunction:: PyObject* PyArray_EnsureArray(PyObject* op) + + This function **steals a reference** to ``op`` and makes sure that + ``op`` is a base-class ndarray. It special cases array scalars, + but otherwise calls :cfunc:`PyArray_FromAny` ( ``op``, NULL, 0, 0, + :cdata:`NPY_ENSUREARRAY`). + +.. cfunction:: PyObject* PyArray_FromString(char* string, npy_intp slen, PyArray_Descr* dtype, npy_intp num, char* sep) + + Construct a one-dimensional ndarray of a single type from a binary + or (ASCII) text ``string`` of length ``slen``. The data-type of + the array to-be-created is given by ``dtype``. If num is -1, then + **copy** the entire string and return an appropriately sized + array, otherwise, ``num`` is the number of items to **copy** from + the string. If ``sep`` is NULL (or ""), then interpret the string + as bytes of binary data, otherwise convert the sub-strings + separated by ``sep`` to items of data-type ``dtype``. Some + data-types may not be readable in text mode and an error will be + raised if that occurs. All errors return NULL. + +.. cfunction:: PyObject* PyArray_FromFile(FILE* fp, PyArray_Descr* dtype, npy_intp num, char* sep) + + Construct a one-dimensional ndarray of a single type from a binary + or text file. The open file pointer is ``fp``, the data-type of + the array to be created is given by ``dtype``. This must match + the data in the file. If ``num`` is -1, then read until the end of + the file and return an appropriately sized array, otherwise, + ``num`` is the number of items to read. If ``sep`` is NULL (or + ""), then read from the file in binary mode, otherwise read from + the file in text mode with ``sep`` providing the item + separator. Some array types cannot be read in text mode in which + case an error is raised. + +.. cfunction:: PyObject* PyArray_FromBuffer(PyObject* buf, PyArray_Descr* dtype, npy_intp count, npy_intp offset) + + Construct a one-dimensional ndarray of a single type from an + object, ``buf``, that exports the (single-segment) buffer protocol + (or has an attribute __buffer\__ that returns an object that + exports the buffer protocol). A writeable buffer will be tried + first followed by a read- only buffer. The :cdata:`NPY_WRITEABLE` + flag of the returned array will reflect which one was + successful. The data is assumed to start at ``offset`` bytes from + the start of the memory location for the object. The type of the + data in the buffer will be interpreted depending on the data- type + descriptor, ``dtype.`` If ``count`` is negative then it will be + determined from the size of the buffer and the requested itemsize, + otherwise, ``count`` represents how many elements should be + converted from the buffer. + +.. cfunction:: int PyArray_CopyInto(PyArrayObject* dest, PyArrayObject* src) + + Copy from the source array, ``src``, into the destination array, + ``dest``, performing a data-type conversion if necessary. If an + error occurs return -1 (otherwise 0). The shape of ``src`` must be + broadcastable to the shape of ``dest``. The data areas of dest + and src must not overlap. + +.. cfunction:: int PyArray_MoveInto(PyArrayObject* dest, PyArrayObject* src) + + Move data from the source array, ``src``, into the destination + array, ``dest``, performing a data-type conversion if + necessary. If an error occurs return -1 (otherwise 0). The shape + of ``src`` must be broadcastable to the shape of ``dest``. The + data areas of dest and src may overlap. + +.. cfunction:: PyArrayObject* PyArray_GETCONTIGUOUS(PyObject* op) + + If ``op`` is already (C-style) contiguous and well-behaved then + just return a reference, otherwise return a (contiguous and + well-behaved) copy of the array. The parameter op must be a + (sub-class of an) ndarray and no checking for that is done. + +.. cfunction:: PyObject* PyArray_FROM_O(PyObject* obj) + + Convert ``obj`` to an ndarray. The argument can be any nested + sequence or object that exports the array interface. This is a + macro form of :cfunc:`PyArray_FromAny` using ``NULL``, 0, 0, 0 for the + other arguments. Your code must be able to handle any data-type + descriptor and any combination of data-flags to use this macro. + +.. cfunction:: PyObject* PyArray_FROM_OF(PyObject* obj, int requirements) + + Similar to :cfunc:`PyArray_FROM_O` except it can take an argument + of *requirements* indicating properties the resulting array must + have. Available requirements that can be enforced are + :cdata:`NPY_CONTIGUOUS`, :cdata:`NPY_F_CONTIGUOUS`, + :cdata:`NPY_ALIGNED`, :cdata:`NPY_WRITEABLE`, + :cdata:`NPY_NOTSWAPPED`, :cdata:`NPY_ENSURECOPY`, + :cdata:`NPY_UPDATEIFCOPY`, :cdata:`NPY_FORCECAST`, and + :cdata:`NPY_ENSUREARRAY`. Standard combinations of flags can also + be used: + +.. cfunction:: PyObject* PyArray_FROM_OT(PyObject* obj, int typenum) + + Similar to :cfunc:`PyArray_FROM_O` except it can take an argument of + *typenum* specifying the type-number the returned array. + +.. cfunction:: PyObject* PyArray_FROM_OTF(PyObject* obj, int typenum, int requirements) + + Combination of :cfunc:`PyArray_FROM_OF` and :cfunc:`PyArray_FROM_OT` + allowing both a *typenum* and a *flags* argument to be provided.. + +.. cfunction:: PyObject* PyArray_FROMANY(PyObject* obj, int typenum, int min, int max, int requirements) + + Similar to :cfunc:`PyArray_FromAny` except the data-type is + specified using a typenumber. :cfunc:`PyArray_DescrFromType` + (*typenum*) is passed directly to :cfunc:`PyArray_FromAny`. This + macro also adds :cdata:`NPY_DEFAULT` to requirements if + :cdata:`NPY_ENSURECOPY` is passed in as requirements. + +.. cfunction:: PyObject *PyArray_CheckAxis(PyObject* obj, int* axis, int requirements) + + Encapsulate the functionality of functions and methods that take + the axis= keyword and work properly with None as the axis + argument. The input array is ``obj``, while ``*axis`` is a + converted integer (so that >=MAXDIMS is the None value), and + ``requirements`` gives the needed properties of ``obj``. The + output is a converted version of the input so that requirements + are met and if needed a flattening has occurred. On output + negative values of ``*axis`` are converted and the new value is + checked to ensure consistency with the shape of ``obj``. + + +Dealing with types +------------------ + + +General check of Python Type +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. cfunction:: PyArray_Check(op) + + Evaluates true if *op* is a Python object whose type is a sub-type + of :cdata:`PyArray_Type`. + +.. cfunction:: PyArray_CheckExact(op) + + Evaluates true if *op* is a Python object with type + :cdata:`PyArray_Type`. + +.. cfunction:: PyArray_HasArrayInterface(op, out) + + If ``op`` implements any part of the array interface, then ``out`` + will contain a new reference to the newly created ndarray using + the interface or ``out`` will contain ``NULL`` if an error during + conversion occurs. Otherwise, out will contain a borrowed + reference to :cdata:`Py_NotImplemented` and no error condition is set. + +.. cfunction:: PyArray_HasArrayInterfaceType(op, type, context, out) + + If ``op`` implements any part of the array interface, then ``out`` + will contain a new reference to the newly created ndarray using + the interface or ``out`` will contain ``NULL`` if an error during + conversion occurs. Otherwise, out will contain a borrowed + reference to Py_NotImplemented and no error condition is set. + This version allows setting of the type and context in the part of + the array interface that looks for the :obj:`__array__` attribute. + +.. cfunction:: PyArray_IsZeroDim(op) + + Evaluates true if *op* is an instance of (a subclass of) + :cdata:`PyArray_Type` and has 0 dimensions. + +.. cfunction:: PyArray_IsScalar(op, cls) + + Evaluates true if *op* is an instance of :cdata:`Py{cls}ArrType_Type`. + +.. cfunction:: PyArray_CheckScalar(op) + + Evaluates true if *op* is either an array scalar (an instance of a + sub-type of :cdata:`PyGenericArr_Type` ), or an instance of (a + sub-class of) :cdata:`PyArray_Type` whose dimensionality is 0. + +.. cfunction:: PyArray_IsPythonScalar(op) + + Evaluates true if *op* is a builtin Python "scalar" object (int, + float, complex, str, unicode, long, bool). + +.. cfunction:: PyArray_IsAnyScalar(op) + + Evaluates true if *op* is either a Python scalar or an array + scalar (an instance of a sub- type of :cdata:`PyGenericArr_Type` ). + + +Data-type checking +^^^^^^^^^^^^^^^^^^ + +For the typenum macros, the argument is an integer representing an +enumerated array data type. For the array type checking macros the +argument must be a :ctype:`PyObject *` that can be directly interpreted as a +:ctype:`PyArrayObject *`. + +.. cfunction:: PyTypeNum_ISUNSIGNED(num) + +.. cfunction:: PyDataType_ISUNSIGNED(descr) + +.. cfunction:: PyArray_ISUNSIGNED(obj) + + Type represents an unsigned integer. + +.. cfunction:: PyTypeNum_ISSIGNED(num) + +.. cfunction:: PyDataType_ISSIGNED(descr) + +.. cfunction:: PyArray_ISSIGNED(obj) + + Type represents a signed integer. + +.. cfunction:: PyTypeNum_ISINTEGER(num) + +.. cfunction:: PyDataType_ISINTEGER(descr) + +.. cfunction:: PyArray_ISINTEGER(obj) + + Type represents any integer. + +.. cfunction:: PyTypeNum_ISFLOAT(num) + +.. cfunction:: PyDataType_ISFLOAT(descr) + +.. cfunction:: PyArray_ISFLOAT(obj) + + Type represents any floating point number. + +.. cfunction:: PyTypeNum_ISCOMPLEX(num) + +.. cfunction:: PyDataType_ISCOMPLEX(descr) + +.. cfunction:: PyArray_ISCOMPLEX(obj) + + Type represents any complex floating point number. + +.. cfunction:: PyTypeNum_ISNUMBER(num) + +.. cfunction:: PyDataType_ISNUMBER(descr) + +.. cfunction:: PyArray_ISNUMBER(obj) + + Type represents any integer, floating point, or complex floating point + number. + +.. cfunction:: PyTypeNum_ISSTRING(num) + +.. cfunction:: PyDataType_ISSTRING(descr) + +.. cfunction:: PyArray_ISSTRING(obj) + + Type represents a string data type. + +.. cfunction:: PyTypeNum_ISPYTHON(num) + +.. cfunction:: PyDataType_ISPYTHON(descr) + +.. cfunction:: PyArray_ISPYTHON(obj) + + Type represents an enumerated type corresponding to one of the + standard Python scalar (bool, int, float, or complex). + +.. cfunction:: PyTypeNum_ISFLEXIBLE(num) + +.. cfunction:: PyDataType_ISFLEXIBLE(descr) + +.. cfunction:: PyArray_ISFLEXIBLE(obj) + + Type represents one of the flexible array types ( :cdata:`NPY_STRING`, + :cdata:`NPY_UNICODE`, or :cdata:`NPY_VOID` ). + +.. cfunction:: PyTypeNum_ISUSERDEF(num) + +.. cfunction:: PyDataType_ISUSERDEF(descr) + +.. cfunction:: PyArray_ISUSERDEF(obj) + + Type represents a user-defined type. + +.. cfunction:: PyTypeNum_ISEXTENDED(num) + +.. cfunction:: PyDataType_ISEXTENDED(descr) + +.. cfunction:: PyArray_ISEXTENDED(obj) + + Type is either flexible or user-defined. + +.. cfunction:: PyTypeNum_ISOBJECT(num) + +.. cfunction:: PyDataType_ISOBJECT(descr) + +.. cfunction:: PyArray_ISOBJECT(obj) + + Type represents object data type. + +.. cfunction:: PyTypeNum_ISBOOL(num) + +.. cfunction:: PyDataType_ISBOOL(descr) + +.. cfunction:: PyArray_ISBOOL(obj) + + Type represents Boolean data type. + +.. cfunction:: PyDataType_HASFIELDS(descr) + +.. cfunction:: PyArray_HASFIELDS(obj) + + Type has fields associated with it. + +.. cfunction:: PyArray_ISNOTSWAPPED(m) + + Evaluates true if the data area of the ndarray *m* is in machine + byte-order according to the array's data-type descriptor. + +.. cfunction:: PyArray_ISBYTESWAPPED(m) + + Evaluates true if the data area of the ndarray *m* is **not** in + machine byte-order according to the array's data-type descriptor. + +.. cfunction:: Bool PyArray_EquivTypes(PyArray_Descr* type1, PyArray_Descr* type2) + + Return :cdata:`NPY_TRUE` if *type1* and *type2* actually represent + equivalent types for this platform (the fortran member of each + type is ignored). For example, on 32-bit platforms, + :cdata:`NPY_LONG` and :cdata:`NPY_INT` are equivalent. Otherwise + return :cdata:`NPY_FALSE`. + +.. cfunction:: Bool PyArray_EquivArrTypes(PyArrayObject* a1, PyArrayObject * a2) + + Return :cdata:`NPY_TRUE` if *a1* and *a2* are arrays with equivalent + types for this platform. + +.. cfunction:: Bool PyArray_EquivTypenums(int typenum1, int typenum2) + + Special case of :cfunc:`PyArray_EquivTypes` (...) that does not accept + flexible data types but may be easier to call. + +.. cfunction:: int PyArray_EquivByteorders({byteorder} b1, {byteorder} b2) + + True if byteorder characters ( :cdata:`NPY_LITTLE`, + :cdata:`NPY_BIG`, :cdata:`NPY_NATIVE`, :cdata:`NPY_IGNORE` ) are + either equal or equivalent as to their specification of a native + byte order. Thus, on a little-endian machine :cdata:`NPY_LITTLE` + and :cdata:`NPY_NATIVE` are equivalent where they are not + equivalent on a big-endian machine. + + +Converting data types +^^^^^^^^^^^^^^^^^^^^^ + +.. cfunction:: PyObject* PyArray_Cast(PyArrayObject* arr, int typenum) + + Mainly for backwards compatibility to the Numeric C-API and for + simple casts to non-flexible types. Return a new array object with + the elements of *arr* cast to the data-type *typenum* which must + be one of the enumerated types and not a flexible type. + +.. cfunction:: PyObject* PyArray_CastToType(PyArrayObject* arr, PyArray_Descr* type, int fortran) + + Return a new array of the *type* specified, casting the elements + of *arr* as appropriate. The fortran argument specifies the + ordering of the output array. + +.. cfunction:: int PyArray_CastTo(PyArrayObject* out, PyArrayObject* in) + + Cast the elements of the array *in* into the array *out*. The + output array should be writeable, have an integer-multiple of the + number of elements in the input array (more than one copy can be + placed in out), and have a data type that is one of the builtin + types. Returns 0 on success and -1 if an error occurs. + +.. cfunction:: PyArray_VectorUnaryFunc* PyArray_GetCastFunc(PyArray_Descr* from, int totype) + + Return the low-level casting function to cast from the given + descriptor to the builtin type number. If no casting function + exists return ``NULL`` and set an error. Using this function + instead of direct access to *from* ->f->cast will allow support of + any user-defined casting functions added to a descriptors casting + dictionary. + +.. cfunction:: int PyArray_CanCastSafely(int fromtype, int totype) + + Returns non-zero if an array of data type *fromtype* can be cast + to an array of data type *totype* without losing information. An + exception is that 64-bit integers are allowed to be cast to 64-bit + floating point values even though this can lose precision on large + integers so as not to proliferate the use of long doubles without + explict requests. Flexible array types are not checked according + to their lengths with this function. + +.. cfunction:: int PyArray_CanCastTo(PyArray_Descr* fromtype, PyArray_Descr* totype) + + Returns non-zero if an array of data type *fromtype* (which can + include flexible types) can be cast safely to an array of data + type *totype* (which can include flexible types). This is + basically a wrapper around :cfunc:`PyArray_CanCastSafely` with + additional support for size checking if *fromtype* and *totype* + are :cdata:`NPY_STRING` or :cdata:`NPY_UNICODE`. + +.. cfunction:: int PyArray_ObjectType(PyObject* op, int mintype) + + This function is useful for determining a common type that two or + more arrays can be converted to. It only works for non-flexible + array types as no itemsize information is passed. The *mintype* + argument represents the minimum type acceptable, and *op* + represents the object that will be converted to an array. The + return value is the enumerated typenumber that represents the + data-type that *op* should have. + +.. cfunction:: void PyArray_ArrayType(PyObject* op, PyArray_Descr* mintype, PyArray_Descr* outtype) + + This function works similarly to :cfunc:`PyArray_ObjectType` (...) + except it handles flexible arrays. The *mintype* argument can have + an itemsize member and the *outtype* argument will have an + itemsize member at least as big but perhaps bigger depending on + the object *op*. + +.. cfunction:: PyArrayObject** PyArray_ConvertToCommonType(PyObject* op, int* n) + + Convert a sequence of Python objects contained in *op* to an array + of ndarrays each having the same data type. The type is selected + based on the typenumber (larger type number is chosen over a + smaller one) ignoring objects that are only scalars. The length of + the sequence is returned in *n*, and an *n* -length array of + :ctype:`PyArrayObject` pointers is the return value (or ``NULL`` if an + error occurs). The returned array must be freed by the caller of + this routine (using :cfunc:`PyDataMem_FREE` ) and all the array objects + in it ``DECREF`` 'd or a memory-leak will occur. The example + template-code below shows a typically usage: + + .. code-block:: c + + mps = PyArray_ConvertToCommonType(obj, &n); + if (mps==NULL) return NULL; + {code} + <before return> + for (i=0; i<n; i++) Py_DECREF(mps[i]); + PyDataMem_FREE(mps); + {return} + +.. cfunction:: char* PyArray_Zero(PyArrayObject* arr) + + A pointer to newly created memory of size *arr* ->itemsize that + holds the representation of 0 for that type. The returned pointer, + *ret*, **must be freed** using :cfunc:`PyDataMem_FREE` (ret) when it is + not needed anymore. + +.. cfunction:: char* PyArray_One(PyArrayObject* arr) + + A pointer to newly created memory of size *arr* ->itemsize that + holds the representation of 1 for that type. The returned pointer, + *ret*, **must be freed** using :cfunc:`PyDataMem_FREE` (ret) when it + is not needed anymore. + +.. cfunction:: int PyArray_ValidType(int typenum) + + Returns :cdata:`NPY_TRUE` if *typenum* represents a valid type-number + (builtin or user-defined or character code). Otherwise, this + function returns :cdata:`NPY_FALSE`. + + +New data types +^^^^^^^^^^^^^^ + +.. cfunction:: void PyArray_InitArrFuncs(PyArray_ArrFuncs* f) + + Initialize all function pointers and members to ``NULL``. + +.. cfunction:: int PyArray_RegisterDataType(PyArray_Descr* dtype) + + Register a data-type as a new user-defined data type for + arrays. The type must have most of its entries filled in. This is + not always checked and errors can produce segfaults. In + particular, the typeobj member of the ``dtype`` structure must be + filled with a Python type that has a fixed-size element-size that + corresponds to the elsize member of *dtype*. Also the ``f`` + member must have the required functions: nonzero, copyswap, + copyswapn, getitem, setitem, and cast (some of the cast functions + may be ``NULL`` if no support is desired). To avoid confusion, you + should choose a unique character typecode but this is not enforced + and not relied on internally. + + A user-defined type number is returned that uniquely identifies + the type. A pointer to the new structure can then be obtained from + :cfunc:`PyArray_DescrFromType` using the returned type number. A -1 is + returned if an error occurs. If this *dtype* has already been + registered (checked only by the address of the pointer), then + return the previously-assigned type-number. + +.. cfunction:: int PyArray_RegisterCastFunc(PyArray_Descr* descr, int totype, PyArray_VectorUnaryFunc* castfunc) + + Register a low-level casting function, *castfunc*, to convert + from the data-type, *descr*, to the given data-type number, + *totype*. Any old casting function is over-written. A ``0`` is + returned on success or a ``-1`` on failure. + +.. cfunction:: int PyArray_RegisterCanCast(PyArray_Descr* descr, int totype, PyArray_SCALARKIND scalar) + + Register the data-type number, *totype*, as castable from + data-type object, *descr*, of the given *scalar* kind. Use + *scalar* = :cdata:`NPY_NOSCALAR` to register that an array of data-type + *descr* can be cast safely to a data-type whose type_number is + *totype*. + + +Special functions for PyArray_OBJECT +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. cfunction:: int PyArray_INCREF(PyArrayObject* op) + + Used for an array, *op*, that contains any Python objects. It + increments the reference count of every object in the array + according to the data-type of *op*. A -1 is returned if an error + occurs, otherwise 0 is returned. + +.. cfunction:: void PyArray_Item_INCREF(char* ptr, PyArray_Descr* dtype) + + A function to INCREF all the objects at the location *ptr* + according to the data-type *dtype*. If *ptr* is the start of a + record with an object at any offset, then this will (recursively) + increment the reference count of all object-like items in the + record. + +.. cfunction:: int PyArray_XDECREF(PyArrayObject* op) + + Used for an array, *op*, that contains any Python objects. It + decrements the reference count of every object in the array + according to the data-type of *op*. Normal return value is 0. A + -1 is returned if an error occurs. + +.. cfunction:: void PyArray_Item_XDECREF(char* ptr, PyArray_Descr* dtype) + + A function to XDECREF all the object-like items at the loacation + *ptr* as recorded in the data-type, *dtype*. This works + recursively so that if ``dtype`` itself has fields with data-types + that contain object-like items, all the object-like fields will be + XDECREF ``'d``. + +.. cfunction:: void PyArray_FillObjectArray(PyArrayObject* arr, PyObject* obj) + + Fill a newly created array with a single value obj at all + locations in the structure with object data-types. No checking is + performed but *arr* must be of data-type :ctype:`PyArray_OBJECT` and be + single-segment and uninitialized (no previous objects in + position). Use :cfunc:`PyArray_DECREF` (*arr*) if you need to + decrement all the items in the object array prior to calling this + function. + + +Array flags +----------- + + +Basic Array Flags +^^^^^^^^^^^^^^^^^ + +An ndarray can have a data segment that is not a simple contiguous +chunk of well-behaved memory you can manipulate. It may not be aligned +with word boundaries (very important on some platforms). It might have +its data in a different byte-order than the machine recognizes. It +might not be writeable. It might be in Fortan-contiguous order. The +array flags are used to indicate what can be said about data +associated with an array. + +.. cvar:: NPY_C_CONTIGUOUS + + The data area is in C-style contiguous order (last index varies the + fastest). + +.. cvar:: NPY_F_CONTIGUOUS + + The data area is in Fortran-style contiguous order (first index varies + the fastest). + +.. cvar:: NPY_OWNDATA + + The data area is owned by this array. + +.. cvar:: NPY_ALIGNED + + The data area is aligned appropriately (for all strides). + +.. cvar:: NPY_WRITEABLE + + The data area can be written to. + + Notice that the above 3 flags are are defined so that a new, well- + behaved array has these flags defined as true. + +.. cvar:: NPY_UPDATEIFCOPY + + The data area represents a (well-behaved) copy whose information + should be transferred back to the original when this array is deleted. + + +Combinations of array flags +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. cvar:: NPY_BEHAVED + + :cdata:`NPY_ALIGNED` \| :cdata:`NPY_WRITEABLE` + +.. cvar:: NPY_CARRAY + + :cdata:`NPY_C_CONTIGUOUS` \| :cdata:`NPY_BEHAVED` + +.. cvar:: NPY_CARRAY_RO + + :cdata:`NPY_C_CONTIGUOUS` \| :cdata:`NPY_ALIGNED` + +.. cvar:: NPY_FARRAY + + :cdata:`NPY_F_CONTIGUOUS` \| :cdata:`NPY_BEHAVED` + +.. cvar:: NPY_FARRAY_RO + + :cdata:`NPY_F_CONTIGUOUS` \| :cdata:`NPY_ALIGNED` + +.. cvar:: NPY_DEFAULT + + :cdata:`NPY_CARRAY` + +.. cvar:: NPY_UPDATE_ALL + + :cdata:`NPY_C_CONTIGUOUS` \| :cdata:`NPY_F_CONTIGUOUS` \| :cdata:`NPY_ALIGNED` + + +Flag-like constants +^^^^^^^^^^^^^^^^^^^ + +These constants are used in :cfunc:`PyArray_FromAny` (and its macro forms) to +specify desired properties of the new array. + +.. cvar:: NPY_FORCECAST + + Cast to the desired type, even if it can't be done without losing + information. + +.. cvar:: NPY_ENSURECOPY + + Make sure the resulting array is a copy of the original. + +.. cvar:: NPY_ENSUREARRAY + + Make sure the resulting object is an actual ndarray (or bigndarray), + and not a sub-class. + +.. cvar:: NPY_NOTSWAPPED + + Only used in :cfunc:`PyArray_CheckFromAny` to over-ride the byteorder + of the data-type object passed in. + +.. cvar:: NPY_BEHAVED_NS + + :cdata:`NPY_ALIGNED` \| :cdata:`NPY_WRITEABLE` \| :cdata:`NPY_NOTSWAPPED` + + +Flag checking +^^^^^^^^^^^^^ + +For all of these macros *arr* must be an instance of a (subclass of) +:cdata:`PyArray_Type`, but no checking is done. + +.. cfunction:: PyArray_CHKFLAGS(arr, flags) + + The first parameter, arr, must be an ndarray or subclass. The + parameter, *flags*, should be an integer consisting of bitwise + combinations of the possible flags an array can have: + :cdata:`NPY_C_CONTIGUOUS`, :cdata:`NPY_F_CONTIGUOUS`, + :cdata:`NPY_OWNDATA`, :cdata:`NPY_ALIGNED`, + :cdata:`NPY_WRITEABLE`, :cdata:`NPY_UPDATEIFCOPY`. + +.. cfunction:: PyArray_ISCONTIGUOUS(arr) + + Evaluates true if *arr* is C-style contiguous. + +.. cfunction:: PyArray_ISFORTRAN(arr) + + Evaluates true if *arr* is Fortran-style contiguous. + +.. cfunction:: PyArray_ISWRITEABLE(arr) + + Evaluates true if the data area of *arr* can be written to + +.. cfunction:: PyArray_ISALIGNED(arr) + + Evaluates true if the data area of *arr* is properly aligned on + the machine. + +.. cfunction:: PyArray_ISBEHAVED(arr) + + Evalutes true if the data area of *arr* is aligned and writeable + and in machine byte-order according to its descriptor. + +.. cfunction:: PyArray_ISBEHAVED_RO(arr) + + Evaluates true if the data area of *arr* is aligned and in machine + byte-order. + +.. cfunction:: PyArray_ISCARRAY(arr) + + Evaluates true if the data area of *arr* is C-style contiguous, + and :cfunc:`PyArray_ISBEHAVED` (*arr*) is true. + +.. cfunction:: PyArray_ISFARRAY(arr) + + Evaluates true if the data area of *arr* is Fortran-style + contiguous and :cfunc:`PyArray_ISBEHAVED` (*arr*) is true. + +.. cfunction:: PyArray_ISCARRAY_RO(arr) + + Evaluates true if the data area of *arr* is C-style contiguous, + aligned, and in machine byte-order. + +.. cfunction:: PyArray_ISFARRAY_RO(arr) + + Evaluates true if the data area of *arr* is Fortran-style + contiguous, aligned, and in machine byte-order **.** + +.. cfunction:: PyArray_ISONESEGMENT(arr) + + Evaluates true if the data area of *arr* consists of a single + (C-style or Fortran-style) contiguous segment. + +.. cfunction:: void PyArray_UpdateFlags(PyArrayObject* arr, int flagmask) + + The :cdata:`NPY_C_CONTIGUOUS`, :cdata:`NPY_ALIGNED`, and + :cdata:`NPY_F_CONTIGUOUS` array flags can be "calculated" from the + array object itself. This routine updates one or more of these + flags of *arr* as specified in *flagmask* by performing the + required calculation. + + +.. warning:: + + It is important to keep the flags updated (using + :cfunc:`PyArray_UpdateFlags` can help) whenever a manipulation with an + array is performed that might cause them to change. Later + calculations in NumPy that rely on the state of these flags do not + repeat the calculation to update them. + + +Array method alternative API +---------------------------- + + +Conversion +^^^^^^^^^^ + +.. cfunction:: PyObject* PyArray_GetField(PyArrayObject* self, PyArray_Descr* dtype, int offset) + + Equivalent to :meth:`ndarray.getfield` (*self*, *dtype*, *offset*). Return + a new array of the given *dtype* using the data in the current + array at a specified *offset* in bytes. The *offset* plus the + itemsize of the new array type must be less than *self* + ->descr->elsize or an error is raised. The same shape and strides + as the original array are used. Therefore, this function has the + effect of returning a field from a record array. But, it can also + be used to select specific bytes or groups of bytes from any array + type. + +.. cfunction:: int PyArray_SetField(PyArrayObject* self, PyArray_Descr* dtype, int offset, PyObject* val) + + Equivalent to :meth:`ndarray.setfield` (*self*, *val*, *dtype*, *offset* + ). Set the field starting at *offset* in bytes and of the given + *dtype* to *val*. The *offset* plus *dtype* ->elsize must be less + than *self* ->descr->elsize or an error is raised. Otherwise, the + *val* argument is converted to an array and copied into the field + pointed to. If necessary, the elements of *val* are repeated to + fill the destination array, But, the number of elements in the + destination must be an integer multiple of the number of elements + in *val*. + +.. cfunction:: PyObject* PyArray_Byteswap(PyArrayObject* self, Bool inplace) + + Equivalent to :meth:`ndarray.byteswap` (*self*, *inplace*). Return an array + whose data area is byteswapped. If *inplace* is non-zero, then do + the byteswap inplace and return a reference to self. Otherwise, + create a byteswapped copy and leave self unchanged. + +.. cfunction:: PyObject* PyArray_NewCopy(PyArrayObject* old, NPY_ORDER order) + + Equivalent to :meth:`ndarray.copy` (*self*, *fortran*). Make a copy of the + *old* array. The returned array is always aligned and writeable + with data interpreted the same as the old array. If *order* is + :cdata:`NPY_CORDER`, then a C-style contiguous array is returned. If + *order* is :cdata:`NPY_FORTRANORDER`, then a Fortran-style contiguous + array is returned. If *order is* :cdata:`NPY_ANYORDER`, then the array + returned is Fortran-style contiguous only if the old one is; + otherwise, it is C-style contiguous. + +.. cfunction:: PyObject* PyArray_ToList(PyArrayObject* self) + + Equivalent to :meth:`ndarray.tolist` (*self*). Return a nested Python list + from *self*. + +.. cfunction:: PyObject* PyArray_ToString(PyArrayObject* self, NPY_ORDER order) + + Equivalent to :meth:`ndarray.tostring` (*self*, *order*). Return the bytes + of this array in a Python string. + +.. cfunction:: PyObject* PyArray_ToFile(PyArrayObject* self, FILE* fp, char* sep, char* format) + + Write the contents of *self* to the file pointer *fp* in C-style + contiguous fashion. Write the data as binary bytes if *sep* is the + string ""or ``NULL``. Otherwise, write the contents of *self* as + text using the *sep* string as the item separator. Each item will + be printed to the file. If the *format* string is not ``NULL`` or + "", then it is a Python print statement format string showing how + the items are to be written. + +.. cfunction:: int PyArray_Dump(PyObject* self, PyObject* file, int protocol) + + Pickle the object in *self* to the given *file* (either a string + or a Python file object). If *file* is a Python string it is + considered to be the name of a file which is then opened in binary + mode. The given *protocol* is used (if *protocol* is negative, or + the highest available is used). This is a simple wrapper around + cPickle.dump(*self*, *file*, *protocol*). + +.. cfunction:: PyObject* PyArray_Dumps(PyObject* self, int protocol) + + Pickle the object in *self* to a Python string and return it. Use + the Pickle *protocol* provided (or the highest available if + *protocol* is negative). + +.. cfunction:: int PyArray_FillWithScalar(PyArrayObject* arr, PyObject* obj) + + Fill the array, *arr*, with the given scalar object, *obj*. The + object is first converted to the data type of *arr*, and then + copied into every location. A -1 is returned if an error occurs, + otherwise 0 is returned. + +.. cfunction:: PyObject* PyArray_View(PyArrayObject* self, PyArray_Descr* dtype) + + Equivalent to :meth:`ndarray.view` (*self*, *dtype*). Return a new view of + the array *self* as possibly a different data-type, *dtype*. If + *dtype* is ``NULL``, then the returned array will have the same + data type as *self*. The new data-type must be consistent with + the size of *self*. Either the itemsizes must be identical, or + *self* must be single-segment and the total number of bytes must + be the same. In the latter case the dimensions of the returned + array will be altered in the last (or first for Fortran-style + contiguous arrays) dimension. The data area of the returned array + and self is exactly the same. + + +Shape Manipulation +^^^^^^^^^^^^^^^^^^ + +.. cfunction:: PyObject* PyArray_Newshape(PyArrayObject* self, PyArray_Dims* newshape) + + Result will be a new array (pointing to the same memory location + as *self* if possible), but having a shape given by *newshape* + . If the new shape is not compatible with the strides of *self*, + then a copy of the array with the new specified shape will be + returned. + +.. cfunction:: PyObject* PyArray_Reshape(PyArrayObject* self, PyObject* shape) + + Equivalent to :meth:`ndarray.reshape` (*self*, *shape*) where *shape* is a + sequence. Converts *shape* to a :ctype:`PyArray_Dims` structure and + calls :cfunc:`PyArray_Newshape` internally. + +.. cfunction:: PyObject* PyArray_Squeeze(PyArrayObject* self) + + Equivalent to :meth:`ndarray.squeeze` (*self*). Return a new view of *self* + with all of the dimensions of length 1 removed from the shape. + +.. warning:: + + matrix objects are always 2-dimensional. Therefore, + :cfunc:`PyArray_Squeeze` has no effect on arrays of matrix sub-class. + +.. cfunction:: PyObject* PyArray_SwapAxes(PyArrayObject* self, int a1, int a2) + + Equivalent to :meth:`ndarray.swapaxes` (*self*, *a1*, *a2*). The returned + array is a new view of the data in *self* with the given axes, + *a1* and *a2*, swapped. + +.. cfunction:: PyObject* PyArray_Resize(PyArrayObject* self, PyArray_Dims* newshape, int refcheck, NPY_ORDER fortran) + + Equivalent to :meth:`ndarray.resize` (*self*, *newshape*, refcheck + ``=`` *refcheck*, order= fortran ). This function only works on + single-segment arrays. It changes the shape of *self* inplace and + will reallocate the memory for *self* if *newshape* has a + different total number of elements then the old shape. If + reallocation is necessary, then *self* must own its data, have + *self* - ``>base==NULL``, have *self* - ``>weakrefs==NULL``, and + (unless refcheck is 0) not be referenced by any other array. A + reference to the new array is returned. The fortran argument can + be :cdata:`NPY_ANYORDER`, :cdata:`NPY_CORDER`, or + :cdata:`NPY_FORTRANORDER`. This argument is used if the number of + dimension is (or is being resized to be) greater than 2. It + currently has no effect. Eventually it could be used to determine + how the resize operation should view the data when constructing a + differently-dimensioned array. + +.. cfunction:: PyObject* PyArray_Transpose(PyArrayObject* self, PyArray_Dims* permute) + + Equivalent to :meth:`ndarray.transpose` (*self*, *permute*). Permute the + axes of the ndarray object *self* according to the data structure + *permute* and return the result. If *permute* is ``NULL``, then + the resulting array has its axes reversed. For example if *self* + has shape :math:`10\times20\times30`, and *permute* ``.ptr`` is + (0,2,1) the shape of the result is :math:`10\times30\times20.` If + *permute* is ``NULL``, the shape of the result is + :math:`30\times20\times10.` + +.. cfunction:: PyObject* PyArray_Flatten(PyArrayObject* self, NPY_ORDER order) + + Equivalent to :meth:`ndarray.flatten` (*self*, *order*). Return a 1-d copy + of the array. If *order* is :cdata:`NPY_FORTRANORDER` the elements are + scanned out in Fortran order (first-dimension varies the + fastest). If *order* is :cdata:`NPY_CORDER`, the elements of ``self`` + are scanned in C-order (last dimension varies the fastest). If + *order* :cdata:`NPY_ANYORDER`, then the result of + :cfunc:`PyArray_ISFORTRAN` (*self*) is used to determine which order + to flatten. + +.. cfunction:: PyObject* PyArray_Ravel(PyArrayObject* self, NPY_ORDER order) + + Equivalent to *self*.ravel(*order*). Same basic functionality + as :cfunc:`PyArray_Flatten` (*self*, *order*) except if *order* is 0 + and *self* is C-style contiguous, the shape is altered but no copy + is performed. + + +Item selection and manipulation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. cfunction:: PyObject* PyArray_TakeFrom(PyArrayObject* self, PyObject* indices, int axis, PyArrayObject* ret, NPY_CLIPMODE clipmode) + + Equivalent to :meth:`ndarray.take` (*self*, *indices*, *axis*, *ret*, + *clipmode*) except *axis* =None in Python is obtained by setting + *axis* = :cdata:`NPY_MAXDIMS` in C. Extract the items from self + indicated by the integer-valued *indices* along the given *axis.* + The clipmode argument can be :cdata:`NPY_RAISE`, :cdata:`NPY_WRAP`, or + :cdata:`NPY_CLIP` to indicate what to do with out-of-bound indices. The + *ret* argument can specify an output array rather than having one + created internally. + +.. cfunction:: PyObject* PyArray_PutTo(PyArrayObject* self, PyObject* values, PyObject* indices, NPY_CLIPMODE clipmode) + + Equivalent to *self*.put(*values*, *indices*, *clipmode* + ). Put *values* into *self* at the corresponding (flattened) + *indices*. If *values* is too small it will be repeated as + necessary. + +.. cfunction:: PyObject* PyArray_PutMask(PyArrayObject* self, PyObject* values, PyObject* mask) + + Place the *values* in *self* wherever corresponding positions + (using a flattened context) in *mask* are true. The *mask* and + *self* arrays must have the same total number of elements. If + *values* is too small, it will be repeated as necessary. + +.. cfunction:: PyObject* PyArray_Repeat(PyArrayObject* self, PyObject* op, int axis) + + Equivalent to :meth:`ndarray.repeat` (*self*, *op*, *axis*). Copy the + elements of *self*, *op* times along the given *axis*. Either + *op* is a scalar integer or a sequence of length *self* + ->dimensions[ *axis* ] indicating how many times to repeat each + item along the axis. + +.. cfunction:: PyObject* PyArray_Choose(PyArrayObject* self, PyObject* op, PyArrayObject* ret, NPY_CLIPMODE clipmode) + + Equivalent to :meth:`ndarray.choose` (*self*, *op*, *ret*, *clipmode*). + Create a new array by selecting elements from the sequence of + arrays in *op* based on the integer values in *self*. The arrays + must all be broadcastable to the same shape and the entries in + *self* should be between 0 and len(*op*). The output is placed + in *ret* unless it is ``NULL`` in which case a new output is + created. The *clipmode* argument determines behavior for when + entries in *self* are not between 0 and len(*op*). + + .. cvar:: NPY_RAISE + + raise a ValueError; + + .. cvar:: NPY_WRAP + + wrap values < 0 by adding len(*op*) and values >=len(*op*) + by subtracting len(*op*) until they are in range; + + .. cvar:: NPY_CLIP + + all values are clipped to the region [0, len(*op*) ). + + +.. cfunction:: PyObject* PyArray_Sort(PyArrayObject* self, int axis) + + Equivalent to :meth:`ndarray.sort` (*self*, *axis*). Return an array with + the items of *self* sorted along *axis*. + +.. cfunction:: PyObject* PyArray_ArgSort(PyArrayObject* self, int axis) + + Equivalent to :meth:`ndarray.argsort` (*self*, *axis*). Return an array of + indices such that selection of these indices along the given + ``axis`` would return a sorted version of *self*. If *self* + ->descr is a data-type with fields defined, then + self->descr->names is used to determine the sort order. A + comparison where the first field is equal will use the second + field and so on. To alter the sort order of a record array, create + a new data-type with a different order of names and construct a + view of the array with that new data-type. + +.. cfunction:: PyObject* PyArray_LexSort(PyObject* sort_keys, int axis) + + Given a sequence of arrays (*sort_keys*) of the same shape, + return an array of indices (similar to :cfunc:`PyArray_ArgSort` (...)) + that would sort the arrays lexicographically. A lexicographic sort + specifies that when two keys are found to be equal, the order is + based on comparison of subsequent keys. A merge sort (which leaves + equal entries unmoved) is required to be defined for the + types. The sort is accomplished by sorting the indices first using + the first *sort_key* and then using the second *sort_key* and so + forth. This is equivalent to the lexsort(*sort_keys*, *axis*) + Python command. Because of the way the merge-sort works, be sure + to understand the order the *sort_keys* must be in (reversed from + the order you would use when comparing two elements). + + If these arrays are all collected in a record array, then + :cfunc:`PyArray_Sort` (...) can also be used to sort the array + directly. + +.. cfunction:: PyObject* PyArray_SearchSorted(PyArrayObject* self, PyObject* values) + + Equivalent to :meth:`ndarray.searchsorted` (*self*, *values*). Assuming + *self* is a 1-d array in ascending order representing bin + boundaries then the output is an array the same shape as *values* + of bin numbers, giving the bin into which each item in *values* + would be placed. No checking is done on whether or not self is in + ascending order. + +.. cfunction:: PyObject* PyArray_Diagonal(PyArrayObject* self, int offset, int axis1, int axis2) + + Equivalent to :meth:`ndarray.diagonal` (*self*, *offset*, *axis1*, *axis2* + ). Return the *offset* diagonals of the 2-d arrays defined by + *axis1* and *axis2*. + +.. cfunction:: PyObject* PyArray_Nonzero(PyArrayObject* self) + + Equivalent to :meth:`ndarray.nonzero` (*self*). Returns a tuple of index + arrays that select elements of *self* that are nonzero. If (nd= + :cfunc:`PyArray_NDIM` ( ``self`` ))==1, then a single index array is + returned. The index arrays have data type :cdata:`NPY_INTP`. If a + tuple is returned (nd :math:`\neq` 1), then its length is nd. + +.. cfunction:: PyObject* PyArray_Compress(PyArrayObject* self, PyObject* condition, int axis, PyArrayObject* out) + + Equivalent to :meth:`ndarray.compress` (*self*, *condition*, *axis* + ). Return the elements along *axis* corresponding to elements of + *condition* that are true. + + +Calculation +^^^^^^^^^^^ + +.. tip:: + + Pass in :cdata:`NPY_MAXDIMS` for axis in order to achieve the same + effect that is obtained by passing in *axis* = :const:`None` in Python + (treating the array as a 1-d array). + +.. cfunction:: PyObject* PyArray_ArgMax(PyArrayObject* self, int axis) + + Equivalent to :meth:`ndarray.argmax` (*self*, *axis*). Return the index of + the largest element of *self* along *axis*. + +.. cfunction:: PyObject* PyArray_ArgMin(PyArrayObject* self, int axis) + + Equivalent to :meth:`ndarray.argmin` (*self*, *axis*). Return the index of + the smallest element of *self* along *axis*. + +.. cfunction:: PyObject* PyArray_Max(PyArrayObject* self, int axis, PyArrayObject* out) + + Equivalent to :meth:`ndarray.max` (*self*, *axis*). Return the largest + element of *self* along the given *axis*. + +.. cfunction:: PyObject* PyArray_Min(PyArrayObject* self, int axis, PyArrayObject* out) + + Equivalent to :meth:`ndarray.min` (*self*, *axis*). Return the smallest + element of *self* along the given *axis*. + +.. cfunction:: PyObject* PyArray_Ptp(PyArrayObject* self, int axis, PyArrayObject* out) + + Equivalent to :meth:`ndarray.ptp` (*self*, *axis*). Return the difference + between the largest element of *self* along *axis* and the + smallest element of *self* along *axis*. + + + +.. note:: + + The rtype argument specifies the data-type the reduction should + take place over. This is important if the data-type of the array + is not "large" enough to handle the output. By default, all + integer data-types are made at least as large as :cdata:`NPY_LONG` + for the "add" and "multiply" ufuncs (which form the basis for + mean, sum, cumsum, prod, and cumprod functions). + +.. cfunction:: PyObject* PyArray_Mean(PyArrayObject* self, int axis, int rtype, PyArrayObject* out) + + Equivalent to :meth:`ndarray.mean` (*self*, *axis*, *rtype*). Returns the + mean of the elements along the given *axis*, using the enumerated + type *rtype* as the data type to sum in. Default sum behavior is + obtained using :cdata:`PyArray_NOTYPE` for *rtype*. + +.. cfunction:: PyObject* PyArray_Trace(PyArrayObject* self, int offset, int axis1, int axis2, int rtype, PyArrayObject* out) + + Equivalent to :meth:`ndarray.trace` (*self*, *offset*, *axis1*, *axis2*, + *rtype*). Return the sum (using *rtype* as the data type of + summation) over the *offset* diagonal elements of the 2-d arrays + defined by *axis1* and *axis2* variables. A positive offset + chooses diagonals above the main diagonal. A negative offset + selects diagonals below the main diagonal. + +.. cfunction:: PyObject* PyArray_Clip(PyArrayObject* self, PyObject* min, PyObject* max) + + Equivalent to :meth:`ndarray.clip` (*self*, *min*, *max*). Clip an array, + *self*, so that values larger than *max* are fixed to *max* and + values less than *min* are fixed to *min*. + +.. cfunction:: PyObject* PyArray_Conjugate(PyArrayObject* self) + + Equivalent to :meth:`ndarray.conjugate` (*self*). + Return the complex conjugate of *self*. If *self* is not of + complex data type, then return *self* with an reference. + +.. cfunction:: PyObject* PyArray_Round(PyArrayObject* self, int decimals, PyArrayObject* out) + + Equivalent to :meth:`ndarray.round` (*self*, *decimals*, *out*). Returns + the array with elements rounded to the nearest decimal place. The + decimal place is defined as the :math:`10^{-\textrm{decimals}}` + digit so that negative *decimals* cause rounding to the nearest 10's, 100's, etc. If out is ``NULL``, then the output array is created, otherwise the output is placed in *out* which must be the correct size and type. + +.. cfunction:: PyObject* PyArray_Std(PyArrayObject* self, int axis, int rtype, PyArrayObject* out) + + Equivalent to :meth:`ndarray.std` (*self*, *axis*, *rtype*). Return the + standard deviation using data along *axis* converted to data type + *rtype*. + +.. cfunction:: PyObject* PyArray_Sum(PyArrayObject* self, int axis, int rtype, PyArrayObject* out) + + Equivalent to :meth:`ndarray.sum` (*self*, *axis*, *rtype*). Return 1-d + vector sums of elements in *self* along *axis*. Perform the sum + after converting data to data type *rtype*. + +.. cfunction:: PyObject* PyArray_CumSum(PyArrayObject* self, int axis, int rtype, PyArrayObject* out) + + Equivalent to :meth:`ndarray.cumsum` (*self*, *axis*, *rtype*). Return + cumulative 1-d sums of elements in *self* along *axis*. Perform + the sum after converting data to data type *rtype*. + +.. cfunction:: PyObject* PyArray_Prod(PyArrayObject* self, int axis, int rtype, PyArrayObject* out) + + Equivalent to :meth:`ndarray.prod` (*self*, *axis*, *rtype*). Return 1-d + products of elements in *self* along *axis*. Perform the product + after converting data to data type *rtype*. + +.. cfunction:: PyObject* PyArray_CumProd(PyArrayObject* self, int axis, int rtype, PyArrayObject* out) + + Equivalent to :meth:`ndarray.cumprod` (*self*, *axis*, *rtype*). Return + 1-d cumulative products of elements in ``self`` along ``axis``. + Perform the product after converting data to data type ``rtype``. + +.. cfunction:: PyObject* PyArray_All(PyArrayObject* self, int axis, PyArrayObject* out) + + Equivalent to :meth:`ndarray.all` (*self*, *axis*). Return an array with + True elements for every 1-d sub-array of ``self`` defined by + ``axis`` in which all the elements are True. + +.. cfunction:: PyObject* PyArray_Any(PyArrayObject* self, int axis, PyArrayObject* out) + + Equivalent to :meth:`ndarray.any` (*self*, *axis*). Return an array with + True elements for every 1-d sub-array of *self* defined by *axis* + in which any of the elements are True. + +Functions +--------- + + +Array Functions +^^^^^^^^^^^^^^^ + +.. cfunction:: int PyArray_AsCArray(PyObject** op, void* ptr, npy_intp* dims, int nd, int typenum, int itemsize) + + Sometimes it is useful to access a multidimensional array as a + C-style multi-dimensional array so that algorithms can be + implemented using C's a[i][j][k] syntax. This routine returns a + pointer, *ptr*, that simulates this kind of C-style array, for + 1-, 2-, and 3-d ndarrays. + + :param op: + + The address to any Python object. This Python object will be replaced + with an equivalent well-behaved, C-style contiguous, ndarray of the + given data type specifice by the last two arguments. Be sure that + stealing a reference in this way to the input object is justified. + + :param ptr: + + The address to a (ctype* for 1-d, ctype** for 2-d or ctype*** for 3-d) + variable where ctype is the equivalent C-type for the data type. On + return, *ptr* will be addressable as a 1-d, 2-d, or 3-d array. + + :param dims: + + An output array that contains the shape of the array object. This + array gives boundaries on any looping that will take place. + + :param nd: + + The dimensionality of the array (1, 2, or 3). + + :param typenum: + + The expected data type of the array. + + :param itemsize: + + This argument is only needed when *typenum* represents a + flexible array. Otherwise it should be 0. + +.. note:: + + The simulation of a C-style array is not complete for 2-d and 3-d + arrays. For example, the simulated arrays of pointers cannot be passed + to subroutines expecting specific, statically-defined 2-d and 3-d + arrays. To pass to functions requiring those kind of inputs, you must + statically define the required array and copy data. + +.. cfunction:: int PyArray_Free(PyObject* op, void* ptr) + + Must be called with the same objects and memory locations returned + from :cfunc:`PyArray_AsCArray` (...). This function cleans up memory + that otherwise would get leaked. + +.. cfunction:: PyObject* PyArray_Concatenate(PyObject* obj, int axis) + + Join the sequence of objects in *obj* together along *axis* into a + single array. If the dimensions or types are not compatible an + error is raised. + +.. cfunction:: PyObject* PyArray_InnerProduct(PyObject* obj1, PyObject* obj2) + + Compute a product-sum over the last dimensions of *obj1* and + *obj2*. Neither array is conjugated. + +.. cfunction:: PyObject* PyArray_MatrixProduct(PyObject* obj1, PyObject* obj) + + Compute a product-sum over the last dimension of *obj1* and the + second-to-last dimension of *obj2*. For 2-d arrays this is a + matrix-product. Neither array is conjugated. + +.. cfunction:: PyObject* PyArray_CopyAndTranspose(PyObject \* op) + + A specialized copy and transpose function that works only for 2-d + arrays. The returned array is a transposed copy of *op*. + +.. cfunction:: PyObject* PyArray_Correlate(PyObject* op1, PyObject* op2, int mode) + + Compute the 1-d correlation of the 1-d arrays *op1* and *op2* + . The correlation is computed at each output point by multiplying + *op1* by a shifted version of *op2* and summing the result. As a + result of the shift, needed values outside of the defined range of + *op1* and *op2* are interpreted as zero. The mode determines how + many shifts to return: 0 - return only shifts that did not need to + assume zero- values; 1 - return an object that is the same size as + *op1*, 2 - return all possible shifts (any overlap at all is + accepted). + +.. cfunction:: PyObject* PyArray_Where(PyObject* condition, PyObject* x, PyObject* y) + + If both ``x`` and ``y`` are ``NULL``, then return + :cfunc:`PyArray_Nonzero` (*condition*). Otherwise, both *x* and *y* + must be given and the object returned is shaped like *condition* + and has elements of *x* and *y* where *condition* is respectively + True or False. + + +Other functions +^^^^^^^^^^^^^^^ + +.. cfunction:: Bool PyArray_CheckStrides(int elsize, int nd, npy_intp numbytes, npy_intp* dims, npy_intp* newstrides) + + Determine if *newstrides* is a strides array consistent with the + memory of an *nd* -dimensional array with shape ``dims`` and + element-size, *elsize*. The *newstrides* array is checked to see + if jumping by the provided number of bytes in each direction will + ever mean jumping more than *numbytes* which is the assumed size + of the available memory segment. If *numbytes* is 0, then an + equivalent *numbytes* is computed assuming *nd*, *dims*, and + *elsize* refer to a single-segment array. Return :cdata:`NPY_TRUE` if + *newstrides* is acceptable, otherwise return :cdata:`NPY_FALSE`. + +.. cfunction:: npy_intp PyArray_MultiplyList(npy_intp* seq, int n) + +.. cfunction:: int PyArray_MultiplyIntList(int* seq, int n) + + Both of these routines multiply an *n* -length array, *seq*, of + integers and return the result. No overflow checking is performed. + +.. cfunction:: int PyArray_CompareLists(npy_intp* l1, npy_intp* l2, int n) + + Given two *n* -length arrays of integers, *l1*, and *l2*, return + 1 if the lists are identical; otherwise, return 0. + + +Array Iterators +--------------- + +An array iterator is a simple way to access the elements of an +N-dimensional array quickly and efficiently. Section `2 +<#sec-array-iterator>`__ provides more description and examples of +this useful approach to looping over an array. + +.. cfunction:: PyObject* PyArray_IterNew(PyObject* arr) + + Return an array iterator object from the array, *arr*. This is + equivalent to *arr*. **flat**. The array iterator object makes + it easy to loop over an N-dimensional non-contiguous array in + C-style contiguous fashion. + +.. cfunction:: PyObject* PyArray_IterAllButAxis(PyObject* arr, int \*axis) + + Return an array iterator that will iterate over all axes but the + one provided in *\*axis*. The returned iterator cannot be used + with :cfunc:`PyArray_ITER_GOTO1D`. This iterator could be used to + write something similar to what ufuncs do wherein the loop over + the largest axis is done by a separate sub-routine. If *\*axis* is + negative then *\*axis* will be set to the axis having the smallest + stride and that axis will be used. + +.. cfunction:: PyObject *PyArray_BroadcastToShape(PyObject* arr, npy_intp *dimensions, int nd) + + Return an array iterator that is broadcast to iterate as an array + of the shape provided by *dimensions* and *nd*. + +.. cfunction:: int PyArrayIter_Check(PyObject* op) + + Evaluates true if *op* is an array iterator (or instance of a + subclass of the array iterator type). + +.. cfunction:: void PyArray_ITER_RESET(PyObject* iterator) + + Reset an *iterator* to the beginning of the array. + +.. cfunction:: void PyArray_ITER_NEXT(PyObject* iterator) + + Incremement the index and the dataptr members of the *iterator* to + point to the next element of the array. If the array is not + (C-style) contiguous, also increment the N-dimensional coordinates + array. + +.. cfunction:: void *PyArray_ITER_DATA(PyObject* iterator) + + A pointer to the current element of the array. + +.. cfunction:: void PyArray_ITER_GOTO(PyObject* iterator, npy_intp* destination) + + Set the *iterator* index, dataptr, and coordinates members to the + location in the array indicated by the N-dimensional c-array, + *destination*, which must have size at least *iterator* + ->nd_m1+1. + +.. cfunction:: PyArray_ITER_GOTO1D(PyObject* iterator, npy_intp index) + + Set the *iterator* index and dataptr to the location in the array + indicated by the integer *index* which points to an element in the + C-styled flattened array. + +.. cfunction:: int PyArray_ITER_NOTDONE(PyObject* iterator) + + Evaluates TRUE as long as the iterator has not looped through all of + the elements, otherwise it evaluates FALSE. + + +Broadcasting (multi-iterators) +------------------------------ + +.. cfunction:: PyObject* PyArray_MultiIterNew(int num, ...) + + A simplified interface to broadcasting. This function takes the + number of arrays to broadcast and then *num* extra ( :ctype:`PyObject *` + ) arguments. These arguments are converted to arrays and iterators + are created. :cfunc:`PyArray_Broadcast` is then called on the resulting + multi-iterator object. The resulting, broadcasted mult-iterator + object is then returned. A broadcasted operation can then be + performed using a single loop and using :cfunc:`PyArray_MultiIter_NEXT` + (..) + +.. cfunction:: void PyArray_MultiIter_RESET(PyObject* multi) + + Reset all the iterators to the beginning in a multi-iterator + object, *multi*. + +.. cfunction:: void PyArray_MultiIter_NEXT(PyObject* multi) + + Advance each iterator in a multi-iterator object, *multi*, to its + next (broadcasted) element. + +.. cfunction:: void *PyArray_MultiIter_DATA(PyObject* multi, int i) + + Return the data-pointer of the *i* :math:`^{\textrm{th}}` iterator + in a multi-iterator object. + +.. cfunction:: void PyArray_MultiIter_NEXTi(PyObject* multi, int i) + + Advance the pointer of only the *i* :math:`^{\textrm{th}}` iterator. + +.. cfunction:: void PyArray_MultiIter_GOTO(PyObject* multi, npy_intp* destination) + + Advance each iterator in a multi-iterator object, *multi*, to the + given :math:`N` -dimensional *destination* where :math:`N` is the + number of dimensions in the broadcasted array. + +.. cfunction:: void PyArray_MultiIter_GOTO1D(PyObject* multi, npy_intp index) + + Advance each iterator in a multi-iterator object, *multi*, to the + corresponding location of the *index* into the flattened + broadcasted array. + +.. cfunction:: int PyArray_MultiIter_NOTDONE(PyObject* multi) + + Evaluates TRUE as long as the multi-iterator has not looped + through all of the elements (of the broadcasted result), otherwise + it evaluates FALSE. + +.. cfunction:: int PyArray_Broadcast(PyArrayMultiIterObject* mit) + + This function encapsulates the broadcasting rules. The *mit* + container should already contain iterators for all the arrays that + need to be broadcast. On return, these iterators will be adjusted + so that iteration over each simultaneously will accomplish the + broadcasting. A negative number is returned if an error occurs. + +.. cfunction:: int PyArray_RemoveSmallest(PyArrayMultiIterObject* mit) + + This function takes a multi-iterator object that has been + previously "broadcasted," finds the dimension with the smallest + "sum of strides" in the broadcasted result and adapts all the + iterators so as not to iterate over that dimension (by effectively + making them of length-1 in that dimension). The corresponding + dimension is returned unless *mit* ->nd is 0, then -1 is + returned. This function is useful for constructing ufunc-like + routines that broadcast their inputs correctly and then call a + strided 1-d version of the routine as the inner-loop. This 1-d + version is usually optimized for speed and for this reason the + loop should be performed over the axis that won't require large + stride jumps. + + +Array Scalars +------------- + +.. cfunction:: PyObject* PyArray_Return(PyArrayObject* arr) + + This function checks to see if *arr* is a 0-dimensional array and, + if so, returns the appropriate array scalar. It should be used + whenever 0-dimensional arrays could be returned to Python. + +.. cfunction:: PyObject* PyArray_Scalar(void* data, PyArray_Descr* dtype, PyObject* itemsize) + + Return an array scalar object of the given enumerated *typenum* + and *itemsize* by **copying** from memory pointed to by *data* + . If *swap* is nonzero then this function will byteswap the data + if appropriate to the data-type because array scalars are always + in correct machine-byte order. + +.. cfunction:: PyObject* PyArray_ToScalar(void* data, PyArrayObject* arr) + + Return an array scalar object of the type and itemsize indicated + by the array object *arr* copied from the memory pointed to by + *data* and swapping if the data in *arr* is not in machine + byte-order. + +.. cfunction:: PyObject* PyArray_FromScalar(PyObject* scalar, PyArray_Descr* outcode) + + Return a 0-dimensional array of type determined by *outcode* from + *scalar* which should be an array-scalar object. If *outcode* is + NULL, then the type is determined from *scalar*. + +.. cfunction:: void PyArray_ScalarAsCtype(PyObject* scalar, void* ctypeptr) + + Return in *ctypeptr* a pointer to the actual value in an array + scalar. There is no error checking so *scalar* must be an + array-scalar object, and ctypeptr must have enough space to hold + the correct type. For flexible-sized types, a pointer to the data + is copied into the memory of *ctypeptr*, for all other types, the + actual data is copied into the address pointed to by *ctypeptr*. + +.. cfunction:: void PyArray_CastScalarToCtype(PyObject* scalar, void* ctypeptr, PyArray_Descr* outcode) + + Return the data (cast to the data type indicated by *outcode*) + from the array-scalar, *scalar*, into the memory pointed to by + *ctypeptr* (which must be large enough to handle the incoming + memory). + +.. cfunction:: PyObject* PyArray_TypeObjectFromType(int type) + + Returns a scalar type-object from a type-number, *type* + . Equivalent to :cfunc:`PyArray_DescrFromType` (*type*)->typeobj + except for reference counting and error-checking. Returns a new + reference to the typeobject on success or ``NULL`` on failure. + +.. cfunction:: NPY_SCALARKIND PyArray_ScalarKind(int typenum, PyArrayObject** arr) + + Return the kind of scalar represented by *typenum* and the array + in *\*arr* (if *arr* is not ``NULL`` ). The array is assumed to be + rank-0 and only used if *typenum* represents a signed integer. If + *arr* is not ``NULL`` and the first element is negative then + :cdata:`NPY_INTNEG_SCALAR` is returned, otherwise + :cdata:`NPY_INTPOS_SCALAR` is returned. The possible return values + are :cdata:`NPY_{kind}_SCALAR` where ``{kind}`` can be **INTPOS**, + **INTNEG**, **FLOAT**, **COMPLEX**, **BOOL**, or **OBJECT**. + :cdata:`NPY_NOSCALAR` is also an enumerated value + :ctype:`NPY_SCALARKIND` variables can take on. + +.. cfunction:: int PyArray_CanCoerceScalar(char thistype, char neededtype, NPY_SCALARKIND scalar) + + Implements the rules for scalar coercion. Scalars are only + silently coerced from thistype to neededtype if this function + returns nonzero. If scalar is :cdata:`NPY_NOSCALAR`, then this + function is equivalent to :cfunc:`PyArray_CanCastSafely`. The rule is + that scalars of the same KIND can be coerced into arrays of the + same KIND. This rule means that high-precision scalars will never + cause low-precision arrays of the same KIND to be upcast. + + +Data-type descriptors +--------------------- + + + +.. warning:: + + Data-type objects must be reference counted so be aware of the + action on the data-type reference of different C-API calls. The + standard rule is that when a data-type object is returned it is a + new reference. Functions that take :ctype:`PyArray_Descr *` objects and + return arrays steal references to the data-type their inputs + unless otherwise noted. Therefore, you must own a reference to any + data-type object used as input to such a function. + +.. cfunction:: int PyArrayDescr_Check(PyObject* obj) + + Evaluates as true if *obj* is a data-type object ( :ctype:`PyArray_Descr *` ). + +.. cfunction:: PyArray_Descr* PyArray_DescrNew(PyArray_Descr* obj) + + Return a new data-type object copied from *obj* (the fields + reference is just updated so that the new object points to the + same fields dictionary if any). + +.. cfunction:: PyArray_Descr* PyArray_DescrNewFromType(int typenum) + + Create a new data-type object from the built-in (or + user-registered) data-type indicated by *typenum*. All builtin + types should not have any of their fields changed. This creates a + new copy of the :ctype:`PyArray_Descr` structure so that you can fill + it in as appropriate. This function is especially needed for + flexible data-types which need to have a new elsize member in + order to be meaningful in array construction. + +.. cfunction:: PyArray_Descr* PyArray_DescrNewByteorder(PyArray_Descr* obj, char newendian) + + Create a new data-type object with the byteorder set according to + *newendian*. All referenced data-type objects (in subdescr and + fields members of the data-type object) are also changed + (recursively). If a byteorder of :cdata:`NPY_IGNORE` is encountered it + is left alone. If newendian is :cdata:`NPY_SWAP`, then all byte-orders + are swapped. Other valid newendian values are :cdata:`NPY_NATIVE`, + :cdata:`NPY_LITTLE`, and :cdata:`NPY_BIG` which all cause the returned + data-typed descriptor (and all it's + referenced data-type descriptors) to have the corresponding byte- + order. + +.. cfunction:: PyArray_Descr* PyArray_DescrFromObject(PyObject* op, PyArray_Descr* mintype) + + Determine an appropriate data-type object from the object *op* + (which should be a "nested" sequence object) and the minimum + data-type descriptor mintype (which can be ``NULL`` ). Similar in + behavior to array(*op*).dtype. Don't confuse this function with + :cfunc:`PyArray_DescrConverter`. This function essentially looks at + all the objects in the (nested) sequence and determines the + data-type from the elements it finds. + +.. cfunction:: PyArray_Descr* PyArray_DescrFromScalar(PyObject* scalar) + + Return a data-type object from an array-scalar object. No checking + is done to be sure that *scalar* is an array scalar. If no + suitable data-type can be determined, then a data-type of + :cdata:`NPY_OBJECT` is returned by default. + +.. cfunction:: PyArray_Descr* PyArray_DescrFromType(int typenum) + + Returns a data-type object corresponding to *typenum*. The + *typenum* can be one of the enumerated types, a character code for + one of the enumerated types, or a user-defined type. + +.. cfunction:: int PyArray_DescrConverter(PyObject* obj, PyArray_Descr** dtype) + + Convert any compatible Python object, *obj*, to a data-type object + in *dtype*. A large number of Python objects can be converted to + data-type objects. See :ref:`arrays.dtypes` for a complete + description. This version of the converter converts None objects + to a :cdata:`NPY_DEFAULT_TYPE` data-type object. This function can + be used with the "O&" character code in :cfunc:`PyArg_ParseTuple` + processing. + +.. cfunction:: int PyArray_DescrConverter2(PyObject* obj, PyArray_Descr** dtype) + + Convert any compatible Python object, *obj*, to a data-type + object in *dtype*. This version of the converter converts None + objects so that the returned data-type is ``NULL``. This function + can also be used with the "O&" character in PyArg_ParseTuple + processing. + +.. cfunction:: int Pyarray_DescrAlignConverter(PyObject* obj, PyArray_Descr** dtype) + + Like :cfunc:`PyArray_DescrConverter` except it aligns C-struct-like + objects on word-boundaries as the compiler would. + +.. cfunction:: int Pyarray_DescrAlignConverter2(PyObject* obj, PyArray_Descr** dtype) + + Like :cfunc:`PyArray_DescrConverter2` except it aligns C-struct-like + objects on word-boundaries as the compiler would. + +.. cfunction:: PyObject *PyArray_FieldNames(PyObject* dict) + + Take the fields dictionary, *dict*, such as the one attached to a + data-type object and construct an ordered-list of field names such + as is stored in the names field of the :ctype:`PyArray_Descr` object. + + +Conversion Utilities +-------------------- + + +For use with :cfunc:`PyArg_ParseTuple` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +All of these functions can be used in :cfunc:`PyArg_ParseTuple` (...) with +the "O&" format specifier to automatically convert any Python object +to the required C-object. All of these functions return +:cdata:`NPY_SUCCEED` if successful and :cdata:`NPY_FAIL` if not. The first +argument to all of these function is a Python object. The second +argument is the **address** of the C-type to convert the Python object +to. + + +.. warning:: + + Be sure to understand what steps you should take to manage the + memory when using these conversion functions. These functions can + require freeing memory, and/or altering the reference counts of + specific objects based on your use. + +.. cfunction:: int PyArray_Converter(PyObject* obj, PyObject** address) + + Convert any Python object to a :ctype:`PyArrayObject`. If + :cfunc:`PyArray_Check` (*obj*) is TRUE then its reference count is + incremented and a reference placed in *address*. If *obj* is not + an array, then convert it to an array using :cfunc:`PyArray_FromAny` + . No matter what is returned, you must DECREF the object returned + by this routine in *address* when you are done with it. + +.. cfunction:: int PyArray_OutputConverter(PyObject* obj, PyArrayObject** address) + + This is a default converter for output arrays given to + functions. If *obj* is :cdata:`Py_None` or ``NULL``, then *\*address* + will be ``NULL`` but the call will succeed. If :cfunc:`PyArray_Check` ( + *obj*) is TRUE then it is returned in *\*address* without + incrementing its reference count. + +.. cfunction:: int PyArray_IntpConverter(PyObject* obj, PyArray_Dims* seq) + + Convert any Python sequence, *obj*, smaller than :cdata:`NPY_MAXDIMS` + to a C-array of :ctype:`npy_intp`. The Python object could also be a + single number. The *seq* variable is a pointer to a structure with + members ptr and len. On successful return, *seq* ->ptr contains a + pointer to memory that must be freed to avoid a memory leak. The + restriction on memory size allows this converter to be + conveniently used for sequences intended to be interpreted as + array shapes. + +.. cfunction:: int PyArray_BufferConverter(PyObject* obj, PyArray_Chunk* buf) + + Convert any Python object, *obj*, with a (single-segment) buffer + interface to a variable with members that detail the object's use + of its chunk of memory. The *buf* variable is a pointer to a + structure with base, ptr, len, and flags members. The + :ctype:`PyArray_Chunk` structure is binary compatibile with the + Python's buffer object (through its len member on 32-bit platforms + and its ptr member on 64-bit platforms or in Python 2.5). On + return, the base member is set to *obj* (or its base if *obj* is + already a buffer object pointing to another object). If you need + to hold on to the memory be sure to INCREF the base member. The + chunk of memory is pointed to by *buf* ->ptr member and has length + *buf* ->len. The flags member of *buf* is :cdata:`NPY_BEHAVED_RO` with + the :cdata:`NPY_WRITEABLE` flag set if *obj* has a writeable buffer + interface. + +.. cfunction:: int PyArray_AxisConverter(PyObject \* obj, int* axis) + + Convert a Python object, *obj*, representing an axis argument to + the proper value for passing to the functions that take an integer + axis. Specifically, if *obj* is None, *axis* is set to + :cdata:`NPY_MAXDIMS` which is interpreted correctly by the C-API + functions that take axis arguments. + +.. cfunction:: int PyArray_BoolConverter(PyObject* obj, Bool* value) + + Convert any Python object, *obj*, to :cdata:`NPY_TRUE` or + :cdata:`NPY_FALSE`, and place the result in *value*. + +.. cfunction:: int PyArray_ByteorderConverter(PyObject* obj, char* endian) + + Convert Python strings into the corresponding byte-order + character: + '>', '<', 's', '=', or '\|'. + +.. cfunction:: int PyArray_SortkindConverter(PyObject* obj, NPY_SORTKIND* sort) + + Convert Python strings into one of :cdata:`NPY_QUICKSORT` (starts + with 'q' or 'Q') , :cdata:`NPY_HEAPSORT` (starts with 'h' or 'H'), + or :cdata:`NPY_MERGESORT` (starts with 'm' or 'M'). + +.. cfunction:: int PyArray_SearchsideConverter(PyObject* obj, NPY_SEARCHSIDE* side) + + Convert Python strings into one of :cdata:`NPY_SEARCHLEFT` (starts with 'l' + or 'L'), or :cdata:`NPY_SEARCHRIGHT` (starts with 'r' or 'R'). + +Other conversions +^^^^^^^^^^^^^^^^^ + +.. cfunction:: int PyArray_PyIntAsInt(PyObject* op) + + Convert all kinds of Python objects (including arrays and array + scalars) to a standard integer. On error, -1 is returned and an + exception set. You may find useful the macro: + + .. code-block:: c + + #define error_converting(x) (((x) == -1) && PyErr_Occurred() + +.. cfunction:: npy_intp PyArray_PyIntAsIntp(PyObject* op) + + Convert all kinds of Python objects (including arrays and array + scalars) to a (platform-pointer-sized) integer. On error, -1 is + returned and an exception set. + +.. cfunction:: int PyArray_IntpFromSequence(PyObject* seq, npy_intp* vals, int maxvals) + + Convert any Python sequence (or single Python number) passed in as + *seq* to (up to) *maxvals* pointer-sized integers and place them + in the *vals* array. The sequence can be smaller then *maxvals* as + the number of converted objects is returned. + +.. cfunction:: int PyArray_TypestrConvert(int itemsize, int gentype) + + Convert typestring characters (with *itemsize*) to basic + enumerated data types. The typestring character corresponding to + signed and unsigned integers, floating point numbers, and + complex-floating point numbers are recognized and converted. Other + values of gentype are returned. This function can be used to + convert, for example, the string'f4' to :cdata:`NPY_FLOAT32`. + + +Miscellaneous +------------- + + +Importing the API +^^^^^^^^^^^^^^^^^ + +In order to make use of the C-API from another extension module, the +``import_array`` () command must be used. If the extension module is +self-contained in a single .c file, then that is all that needs to be +done. If, however, the extension module involves multiple files where +the C-API is needed then some additional steps must be taken. + +.. cfunction:: void import_array(void) + + This function must be called in the initialization section of a + module that will make use of the C-API. It imports the module + where the function-pointer table is stored and points the correct + variable to it. + +.. cmacro:: PY_ARRAY_UNIQUE_SYMBOL + +.. cmacro:: NO_IMPORT_ARRAY + + Using these #defines you can use the C-API in multiple files for a + single extension module. In each file you must define + :cmacro:`PY_ARRAY_UNIQUE_SYMBOL` to some name that will hold the + C-API (*e.g.* myextension_ARRAY_API). This must be done **before** + including the numpy/arrayobject.h file. In the module + intialization routine you call ``import_array`` (). In addition, + in the files that do not have the module initialization + sub_routine define :cmacro:`NO_IMPORT_ARRAY` prior to including + numpy/arrayobject.h. + + Suppose I have two files coolmodule.c and coolhelper.c which need + to be compiled and linked into a single extension module. Suppose + coolmodule.c contains the required initcool module initialization + function (with the import_array() function called). Then, + coolmodule.c would have at the top: + + .. code-block:: c + + #define PY_ARRAY_UNIQUE_SYMBOL cool_ARRAY_API + #include numpy/arrayobject.h + + On the other hand, coolhelper.c would contain at the top: + + .. code-block:: c + + #define PY_ARRAY_UNIQUE_SYMBOL cool_ARRAY_API + #define NO_IMPORT_ARRAY + #include numpy/arrayobject.h + +.. cfunction:: unsigned int PyArray_GetNDArrayCVersion(void) + + This just returns the value :cdata:`NPY_VERSION`. Because it is in the + C-API, however, comparing the output of this function from the + value defined in the current header gives a way to test if the + C-API has changed thus requiring a re-compilation of extension + modules that use the C-API. + + +Internal Flexibility +^^^^^^^^^^^^^^^^^^^^ + +.. cfunction:: int PyArray_SetNumericOps(PyObject* dict) + + NumPy stores an internal table of Python callable objects that are + used to implement arithmetic operations for arrays as well as + certain array calculation methods. This function allows the user + to replace any or all of these Python objects with their own + versions. The keys of the dictionary, *dict*, are the named + functions to replace and the paired value is the Python callable + object to use. Care should be taken that the function used to + replace an internal array operation does not itself call back to + that internal array operation (unless you have designed the + function to handle that), or an unchecked infinite recursion can + result (possibly causing program crash). The key names that + represent operations that can be replaced are: + + **add**, **subtract**, **multiply**, **divide**, + **remainder**, **power**, **square**, **reciprocal**, + **ones_like**, **sqrt**, **negative**, **absolute**, + **invert**, **left_shift**, **right_shift**, + **bitwise_and**, **bitwise_xor**, **bitwise_or**, + **less**, **less_equal**, **equal**, **not_equal**, + **greater**, **greater_equal**, **floor_divide**, + **true_divide**, **logical_or**, **logical_and**, + **floor**, **ceil**, **maximum**, **minimum**, **rint**. + + + These functions are included here because they are used at least once + in the array object's methods. The function returns -1 (without + setting a Python Error) if one of the objects being assigned is not + callable. + +.. cfunction:: PyObject* PyArray_GetNumericOps(void) + + Return a Python dictionary containing the callable Python objects + stored in the the internal arithmetic operation table. The keys of + this dictionary are given in the explanation for :cfunc:`PyArray_SetNumericOps`. + +.. cfunction:: void PyArray_SetStringFunction(PyObject* op, int repr) + + This function allows you to alter the tp_str and tp_repr methods + of the array object to any Python function. Thus you can alter + what happens for all arrays when str(arr) or repr(arr) is called + from Python. The function to be called is passed in as *op*. If + *repr* is non-zero, then this function will be called in response + to repr(arr), otherwise the function will be called in response to + str(arr). No check on whether or not *op* is callable is + performed. The callable passed in to *op* should expect an array + argument and should return a string to be printed. + + +Memory management +^^^^^^^^^^^^^^^^^ + +.. cfunction:: char* PyDataMem_NEW(size_t nbytes) + +.. cfunction:: PyDataMem_FREE(char* ptr) + +.. cfunction:: char* PyDataMem_RENEW(void * ptr, size_t newbytes) + + Macros to allocate, free, and reallocate memory. These macros are used + internally to create arrays. + +.. cfunction:: npy_intp* PyDimMem_NEW(nd) + +.. cfunction:: PyDimMem_FREE(npy_intp* ptr) + +.. cfunction:: npy_intp* PyDimMem_RENEW(npy_intp* ptr, npy_intp newnd) + + Macros to allocate, free, and reallocate dimension and strides memory. + +.. cfunction:: PyArray_malloc(nbytes) + +.. cfunction:: PyArray_free(ptr) + +.. cfunction:: PyArray_realloc(ptr, nbytes) + + These macros use different memory allocators, depending on the + constant :cdata:`NPY_USE_PYMEM`. The system malloc is used when + :cdata:`NPY_USE_PYMEM` is 0, if :cdata:`NPY_USE_PYMEM` is 1, then + the Python memory allocator is used. + + +Threading support +^^^^^^^^^^^^^^^^^ + +These macros are only meaningful if :cdata:`NPY_ALLOW_THREADS` +evaluates True during compilation of the extension module. Otherwise, +these macros are equivalent to whitespace. Python uses a single Global +Interpreter Lock (GIL) for each Python process so that only a single +thread may excecute at a time (even on multi-cpu machines). When +calling out to a compiled function that may take time to compute (and +does not have side-effects for other threads like updated global +variables), the GIL should be released so that other Python threads +can run while the time-consuming calculations are performed. This can +be accomplished using two groups of macros. Typically, if one macro in +a group is used in a code block, all of them must be used in the same +code block. Currently, :cdata:`NPY_ALLOW_THREADS` is defined to the +python-defined :cdata:`WITH_THREADS` constant unless the environment +variable :cdata:`NPY_NOSMP` is set in which case +:cdata:`NPY_ALLOW_THREADS` is defined to be 0. + +Group 1 +""""""" + + This group is used to call code that may take some time but does not + use any Python C-API calls. Thus, the GIL should be released during + its calculation. + + .. cmacro:: NPY_BEGIN_ALLOW_THREADS + + Equivalent to :cmacro:`Py_BEGIN_ALLOW_THREADS` except it uses + :cdata:`NPY_ALLOW_THREADS` to determine if the macro if + replaced with white-space or not. + + .. cmacro:: NPY_END_ALLOW_THREADS + + Equivalent to :cmacro:`Py_END_ALLOW_THREADS` except it uses + :cdata:`NPY_ALLOW_THREADS` to determine if the macro if + replaced with white-space or not. + + .. cmacro:: NPY_BEGIN_THREADS_DEF + + Place in the variable declaration area. This macro sets up the + variable needed for storing the Python state. + + .. cmacro:: NPY_BEGIN_THREADS + + Place right before code that does not need the Python + interpreter (no Python C-API calls). This macro saves the + Python state and releases the GIL. + + .. cmacro:: NPY_END_THREADS + + Place right after code that does not need the Python + interpreter. This macro acquires the GIL and restores the + Python state from the saved variable. + + .. cfunction:: NPY_BEGIN_THREADS_DESCR(PyArray_Descr *dtype) + + Useful to release the GIL only if *dtype* does not contain + arbitrary Python objects which may need the Python interpreter + during execution of the loop. Equivalent to + + .. cfunction:: NPY_END_THREADS_DESCR(PyArray_Descr *dtype) + + Useful to regain the GIL in situations where it was released + using the BEGIN form of this macro. + +Group 2 +""""""" + + This group is used to re-acquire the Python GIL after it has been + released. For example, suppose the GIL has been released (using the + previous calls), and then some path in the code (perhaps in a + different subroutine) requires use of the Python C-API, then these + macros are useful to acquire the GIL. These macros accomplish + essentially a reverse of the previous three (acquire the LOCK saving + what state it had) and then re-release it with the saved state. + + .. cmacro:: NPY_ALLOW_C_API_DEF + + Place in the variable declaration area to set up the necessary + variable. + + .. cmacro:: NPY_ALLOW_C_API + + Place before code that needs to call the Python C-API (when it is + known that the GIL has already been released). + + .. cmacro:: NPY_DISABLE_C_API + + Place after code that needs to call the Python C-API (to re-release + the GIL). + +.. tip:: + + Never use semicolons after the threading support macros. + + +Priority +^^^^^^^^ + +.. cvar:: NPY_PRIOIRTY + + Default priority for arrays. + +.. cvar:: NPY_SUBTYPE_PRIORITY + + Default subtype priority. + +.. cvar:: NPY_SCALAR_PRIORITY + + Default scalar priority (very small) + +.. cfunction:: double PyArray_GetPriority(PyObject* obj, double def) + + Return the :obj:`__array_priority__` attribute (converted to a + double) of *obj* or *def* if no attribute of that name + exists. Fast returns that avoid the attribute lookup are provided + for objects of type :cdata:`PyArray_Type`. + + +Default buffers +^^^^^^^^^^^^^^^ + +.. cvar:: NPY_BUFSIZE + + Default size of the user-settable internal buffers. + +.. cvar:: NPY_MIN_BUFSIZE + + Smallest size of user-settable internal buffers. + +.. cvar:: NPY_MAX_BUFSIZE + + Largest size allowed for the user-settable buffers. + + +Other constants +^^^^^^^^^^^^^^^ + +.. cvar:: NPY_NUM_FLOATTYPE + + The number of floating-point types + +.. cvar:: NPY_MAXDIMS + + The maximum number of dimensions allowed in arrays. + +.. cvar:: NPY_VERSION + + The current version of the ndarray object (check to see if this + variable is defined to guarantee the numpy/arrayobject.h header is + being used). + +.. cvar:: NPY_FALSE + + Defined as 0 for use with Bool. + +.. cvar:: NPY_TRUE + + Defined as 1 for use with Bool. + +.. cvar:: NPY_FAIL + + The return value of failed converter functions which are called using + the "O&" syntax in :cfunc:`PyArg_ParseTuple`-like functions. + +.. cvar:: NPY_SUCCEED + + The return value of successful converter functions which are called + using the "O&" syntax in :cfunc:`PyArg_ParseTuple`-like functions. + + +Miscellaneous Macros +^^^^^^^^^^^^^^^^^^^^ + +.. cfunction:: PyArray_SAMESHAPE(a1, a2) + + Evaluates as True if arrays *a1* and *a2* have the same shape. + +.. cfunction:: PyArray_MAX(a,b) + + Returns the maximum of *a* and *b*. If (*a*) or (*b*) are + expressions they are evaluated twice. + +.. cfunction:: PyArray_MIN(a,b) + + Returns the minimum of *a* and *b*. If (*a*) or (*b*) are + expressions they are evaluated twice. + +.. cfunction:: PyArray_CLT(a,b) + +.. cfunction:: PyArray_CGT(a,b) + +.. cfunction:: PyArray_CLE(a,b) + +.. cfunction:: PyArray_CGE(a,b) + +.. cfunction:: PyArray_CEQ(a,b) + +.. cfunction:: PyArray_CNE(a,b) + + Implements the complex comparisons between two complex numbers + (structures with a real and imag member) using NumPy's definition + of the ordering which is lexicographic: comparing the real parts + first and then the complex parts if the real parts are equal. + +.. cfunction:: PyArray_REFCOUNT(PyObject* op) + + Returns the reference count of any Python object. + +.. cfunction:: PyArray_XDECREF_ERR(PyObject \*obj) + + DECREF's an array object which may have the :cdata:`NPY_UPDATEIFCOPY` + flag set without causing the contents to be copied back into the + original array. Resets the :cdata:`NPY_WRITEABLE` flag on the base + object. This is useful for recovering from an error condition when + :cdata:`NPY_UPDATEIFCOPY` is used. + + +Enumerated Types +^^^^^^^^^^^^^^^^ + +.. ctype:: NPY_SORTKIND + + A special variable-type which can take on the values :cdata:`NPY_{KIND}` + where ``{KIND}`` is + + **QUICKSORT**, **HEAPSORT**, **MERGESORT** + + .. cvar:: NPY_NSORTS + + Defined to be the number of sorts. + +.. ctype:: NPY_SCALARKIND + + A special variable type indicating the number of "kinds" of + scalars distinguished in determining scalar-coercion rules. This + variable can take on the values :cdata:`NPY_{KIND}` where ``{KIND}`` can be + + **NOSCALAR**, **BOOL_SCALAR**, **INTPOS_SCALAR**, + **INTNEG_SCALAR**, **FLOAT_SCALAR**, **COMPLEX_SCALAR**, + **OBJECT_SCALAR** + + + .. cvar:: NPY_NSCALARKINDS + + Defined to be the number of scalar kinds + (not including :cdata:`NPY_NOSCALAR`). + +.. ctype:: NPY_ORDER + + A variable type indicating the order that an array should be + interpreted in. The value of a variable of this type can be + :cdata:`NPY_{ORDER}` where ``{ORDER}`` is + + **ANYORDER**, **CORDER**, **FORTRANORDER** + +.. ctype:: NPY_CLIPMODE + + A variable type indicating the kind of clipping that should be + applied in certain functions. The value of a variable of this type + can be :cdata:`NPY_{MODE}` where ``{MODE}`` is + + **CLIP**, **WRAP**, **RAISE** + +.. index:: + pair: ndarray; C-API diff --git a/doc/source/reference/c-api.config.rst b/doc/source/reference/c-api.config.rst new file mode 100644 index 000000000..30dd1c6fb --- /dev/null +++ b/doc/source/reference/c-api.config.rst @@ -0,0 +1,110 @@ +Configuration defines +===================== + +.. sectionauthor:: Travis E. Oliphant + +When NumPy is built, a configuration file is constructed and placed as +config.h in the NumPy include directory. This configuration file +ensures that specific macros are defined and defines other macros +based on whether or not your system has certain features. It is +included by the arrayobject.h file. + + +Guaranteed to be defined +------------------------ + +The :cdata:`SIZEOF_{CTYPE}` constants are defined so that sizeof +information is available to the pre-processor. + +.. cvar:: CHAR_BIT + + The number of bits of a char. The char is the unit of all sizeof + definitions + +.. cvar:: SIZEOF_SHORT + + sizeof(short) + +.. cvar:: SIZEOF_INT + + sizeof(int) + +.. cvar:: SIZEOF_LONG + + sizeof(long) + +.. cvar:: SIZEOF_LONG_LONG + + sizeof(longlong) where longlong is defined appropriately on the + platform (A macro defines **SIZEOF_LONGLONG** as well.) + +.. cvar:: SIZEOF_PY_LONG_LONG + + +.. cvar:: SIZEOF_FLOAT + + sizeof(float) + +.. cvar:: SIZEOF_DOUBLE + + sizeof(double) + +.. cvar:: SIZEOF_LONG_DOUBLE + + sizeof(longdouble) (A macro defines **SIZEOF_LONGDOUBLE** as well.) + +.. cvar:: SIZEOF_PY_INTPTR_T + + Size of a pointer on this platform (sizeof(void \*)) (A macro defines + SIZEOF_INTP as well.) + + +Possible defines +---------------- + +These defines will cause the compilation to ignore compatibility code +that is placed in NumPy and use the system code instead. If they are +not defined, then the system does not have that capability. + +.. cvar:: HAVE_LONGDOUBLE_FUNCS + + System has C99 long double math functions. + +.. cvar:: HAVE_FLOAT_FUNCS + + System has C99 float math functions. + +.. cvar:: HAVE_INVERSE_HYPERBOLIC + + System has inverse hyperbolic functions: asinh, acosh, and atanh. + +.. cvar:: HAVE_INVERSE_HYPERBOLIC_FLOAT + + System has C99 float extensions to inverse hyperbolic functions: + asinhf, acoshf, atanhf + +.. cvar:: HAVE_INVERSE_HYPERBOLIC_LONGDOUBLE + + System has C99 long double extensions to inverse hyperbolic functions: + asinhl, acoshl, atanhl. + +.. cvar:: HAVE_ISNAN + + System has an isnan function. + +.. cvar:: HAVE_ISINF + + System has an isinf function. + +.. cvar:: HAVE_LOG1P + + System has the log1p function: :math:`\log\left(x+1\right)`. + +.. cvar:: HAVE_EXPM1 + + System has the expm1 function: :math:`\exp\left(x\right)-1`. + +.. cvar:: HAVE_RINT + + System has the rint function. + diff --git a/doc/source/reference/c-api.dtype.rst b/doc/source/reference/c-api.dtype.rst new file mode 100644 index 000000000..071b4b629 --- /dev/null +++ b/doc/source/reference/c-api.dtype.rst @@ -0,0 +1,218 @@ +Data Type API +============= + +.. sectionauthor:: Travis E. Oliphant + +The standard array can have 21 different data types (and has some +support for adding your own types). These data types all have an +enumerated type, an enumerated type-character, and a corresponding +array scalar Python type object (placed in a hierarchy). There are +also standard C typedefs to make it easier to manipulate elements of +the given data type. For the numeric types, there are also bit-width +equivalent C typedefs and named typenumbers that make it easier to +select the precision desired. + +.. warning:: + + The names for the types in c code follows c naming conventions + more closely. The Python names for these types follow Python + conventions. Thus, :cdata:`NPY_FLOAT` picks up a 32-bit float in + C, but :class:`numpy.float_` in Python corresponds to a 64-bit + double. The bit-width names can be used in both Python and C for + clarity. + + +Enumerated Types +---------------- + +There is a list of enumerated types defined providing the basic 21 +data types plus some useful generic names. Whenever the code requires +a type number, one of these enumerated types is requested. The types +are all called :cdata:`NPY_{NAME}` where ``{NAME}`` can be + + **BOOL**, **BYTE**, **UBYTE**, **SHORT**, **USHORT**, **INT**, + **UINT**, **LONG**, **ULONG**, **LONGLONG**, **ULONGLONG**, + **FLOAT**, **DOUBLE**, **LONGDOUBLE**, **CFLOAT**, **CDOUBLE**, + **CLONGDOUBLE**, **OBJECT**, **STRING**, **UNICODE**, **VOID** + + **NTYPES**, **NOTYPE**, **USERDEF**, **DEFAULT_TYPE** + +The various character codes indicating certain types are also part of +an enumerated list. References to type characters (should they be +needed at all) should always use these enumerations. The form of them +is :cdata:`NPY_{NAME}LTR` where ``{NAME}`` can be + + **BOOL**, **BYTE**, **UBYTE**, **SHORT**, **USHORT**, **INT**, + **UINT**, **LONG**, **ULONG**, **LONGLONG**, **ULONGLONG**, + **FLOAT**, **DOUBLE**, **LONGDOUBLE**, **CFLOAT**, **CDOUBLE**, + **CLONGDOUBLE**, **OBJECT**, **STRING**, **VOID** + + **INTP**, **UINTP** + + **GENBOOL**, **SIGNED**, **UNSIGNED**, **FLOATING**, **COMPLEX** + +The latter group of ``{NAME}s`` corresponds to letters used in the array +interface typestring specification. + + +Defines +------- + +Max and min values for integers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. cvar:: NPY_MAX_INT{bits} + +.. cvar:: NPY_MAX_UINT{bits} + +.. cvar:: NPY_MIN_INT{bits} + + These are defined for ``{bits}`` = 8, 16, 32, 64, 128, and 256 and provide + the maximum (minimum) value of the corresponding (unsigned) integer + type. Note: the actual integer type may not be available on all + platforms (i.e. 128-bit and 256-bit integers are rare). + +.. cvar:: NPY_MIN_{type} + + This is defined for ``{type}`` = **BYTE**, **SHORT**, **INT**, + **LONG**, **LONGLONG**, **INTP** + +.. cvar:: NPY_MAX_{type} + + This is defined for all defined for ``{type}`` = **BYTE**, **UBYTE**, + **SHORT**, **USHORT**, **INT**, **UINT**, **LONG**, **ULONG**, + **LONGLONG**, **ULONGLONG**, **INTP**, **UINTP** + + +Number of bits in data types +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +All :cdata:`NPY_SIZEOF_{CTYPE}` constants have corresponding +:cdata:`NPY_BITSOF_{CTYPE}` constants defined. The :cdata:`NPY_BITSOF_{CTYPE}` +constants provide the number of bits in the data type. Specifically, +the available ``{CTYPE}s`` are + + **BOOL**, **CHAR**, **SHORT**, **INT**, **LONG**, + **LONGLONG**, **FLOAT**, **DOUBLE**, **LONGDOUBLE** + + +Bit-width references to enumerated typenums +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +All of the numeric data types (integer, floating point, and complex) +have constants that are defined to be a specific enumerated type +number. Exactly which enumerated type a bit-width type refers to is +platform dependent. In particular, the constants available are +:cdata:`PyArray_{NAME}{BITS}` where ``{NAME}`` is **INT**, **UINT**, +**FLOAT**, **COMPLEX** and ``{BITS}`` can be 8, 16, 32, 64, 80, 96, 128, +160, 192, 256, and 512. Obviously not all bit-widths are available on +all platforms for all the kinds of numeric types. Commonly 8-, 16-, +32-, 64-bit integers; 32-, 64-bit floats; and 64-, 128-bit complex +types are available. + + +Integer that can hold a pointer +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The constants **PyArray_INTP** and **PyArray_UINTP** refer to an +enumerated integer type that is large enough to hold a pointer on the +platform. Index arrays should always be converted to **PyArray_INTP** +, because the dimension of the array is of type npy_intp. + + +C-type names +------------ + +There are standard variable types for each of the numeric data types +and the bool data type. Some of these are already available in the +C-specification. You can create variables in extension code with these +types. + + +Boolean +^^^^^^^ + +.. ctype:: npy_bool + + unsigned char; The constants :cdata:`NPY_FALSE` and + :cdata:`NPY_TRUE` are also defined. + + +(Un)Signed Integer +^^^^^^^^^^^^^^^^^^ + +Unsigned versions of the integers can be defined by pre-pending a 'u' +to the front of the integer name. + +.. ctype:: npy_(u)byte + + (unsigned) char + +.. ctype:: npy_(u)short + + (unsigned) short + +.. ctype:: npy_(u)int + + (unsigned) int + +.. ctype:: npy_(u)long + + (unsigned) long int + +.. ctype:: npy_(u)longlong + + (unsigned long long int) + +.. ctype:: npy_(u)intp + + (unsigned) Py_intptr_t (an integer that is the size of a pointer on + the platform). + + +(Complex) Floating point +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. ctype:: npy_(c)float + + float + +.. ctype:: npy_(c)double + + double + +.. ctype:: npy_(c)longdouble + + long double + +complex types are structures with **.real** and **.imag** members (in +that order). + + +Bit-width names +^^^^^^^^^^^^^^^ + +There are also typedefs for signed integers, unsigned integers, +floating point, and complex floating point types of specific bit- +widths. The available type names are + + :ctype:`npy_int{bits}`, :ctype:`npy_uint{bits}`, :ctype:`npy_float{bits}`, + and :ctype:`npy_complex{bits}` + +where ``{bits}`` is the number of bits in the type and can be **8**, +**16**, **32**, **64**, 128, and 256 for integer types; 16, **32** +, **64**, 80, 96, 128, and 256 for floating-point types; and 32, +**64**, **128**, 160, 192, and 512 for complex-valued types. Which +bit-widths are available is platform dependent. The bolded bit-widths +are usually available on all platforms. + + +Printf Formatting +----------------- + +For help in printing, the following strings are defined as the correct +format specifier in printf and related commands. + + :cdata:`NPY_LONGLONG_FMT`, :cdata:`NPY_ULONGLONG_FMT`, + :cdata:`NPY_INTP_FMT`, :cdata:`NPY_UINTP_FMT`, + :cdata:`NPY_LONGDOUBLE_FMT` diff --git a/doc/source/reference/c-api.rst b/doc/source/reference/c-api.rst new file mode 100644 index 000000000..27420a58c --- /dev/null +++ b/doc/source/reference/c-api.rst @@ -0,0 +1,47 @@ +.. _c-api: + +########### +Numpy C-API +########### + +.. sectionauthor:: Travis E. Oliphant + +| Beware of the man who won't be bothered with details. +| --- *William Feather, Sr.* + +| The truth is out there. +| --- *Chris Carter, The X Files* + + +NumPy provides a C-API to enable users to extend the system and get +access to the array object for use in other routines. The best way to +truly understand the C-API is to read the source code. If you are +unfamiliar with (C) source code, however, this can be a daunting +experience at first. Be assured that the task becomes easier with +practice, and you may be surprised at how simple the C-code can be to +understand. Even if you don't think you can write C-code from scratch, +it is much easier to understand and modify already-written source code +then create it *de novo*. + +Python extensions are especially straightforward to understand because +they all have a very similar structure. Admittedly, NumPy is not a +trivial extension to Python, and may take a little more snooping to +grasp. This is especially true because of the code-generation +techniques, which simplify maintenance of very similar code, but can +make the code a little less readable to beginners. Still, with a +little persistence, the code can be opened to your understanding. It +is my hope, that this guide to the C-API can assist in the process of +becoming familiar with the compiled-level work that can be done with +NumPy in order to squeeze that last bit of necessary speed out of your +code. + +.. currentmodule:: numpy-c-api + +.. toctree:: + :maxdepth: 2 + + c-api.types-and-structures + c-api.config + c-api.dtype + c-api.array + c-api.ufunc diff --git a/doc/source/reference/c-api.types-and-structures.rst b/doc/source/reference/c-api.types-and-structures.rst new file mode 100644 index 000000000..82b529663 --- /dev/null +++ b/doc/source/reference/c-api.types-and-structures.rst @@ -0,0 +1,1173 @@ +***************************** +Python Types and C-Structures +***************************** + +.. sectionauthor:: Travis E. Oliphant + +Several new types are defined in the C-code. Most of these are +accessible from Python, but a few are not exposed due to their limited +use. Every new Python type has an associated :ctype:`PyObject *` with an +internal structure that includes a pointer to a "method table" that +defines how the new object behaves in Python. When you receive a +Python object into C code, you always get a pointer to a +:ctype:`PyObject` structure. Because a :ctype:`PyObject` structure is +very generic and defines only :cmacro:`PyObject_HEAD`, by itself it +is not very interesting. However, different objects contain more +details after the :cmacro:`PyObject_HEAD` (but you have to cast to the +correct type to access them --- or use accessor functions or macros). + + +New Python Types Defined +======================== + +Python types are the functional equivalent in C of classes in Python. +By constructing a new Python type you make available a new object for +Python. The ndarray object is an example of a new type defined in C. +New types are defined in C by two basic steps: + +1. creating a C-structure (usually named :ctype:`Py{Name}Object`) that is + binary- compatible with the :ctype:`PyObject` structure itself but holds + the additional information needed for that particular object; + +2. populating the :ctype:`PyTypeObject` table (pointed to by the ob_type + member of the :ctype:`PyObject` structure) with pointers to functions + that implement the desired behavior for the type. + +Instead of special method names which define behavior for Python +classes, there are "function tables" which point to functions that +implement the desired results. Since Python 2.2, the PyTypeObject +itself has become dynamic which allows C types that can be "sub-typed +"from other C-types in C, and sub-classed in Python. The children +types inherit the attributes and methods from their parent(s). + +There are two major new types: the ndarray ( :cdata:`PyArray_Type` ) +and the ufunc ( :cdata:`PyUFunc_Type` ). Additional types play a +supportive role: the :cdata:`PyArrayIter_Type`, the +:cdata:`PyArrayMultiIter_Type`, and the :cdata:`PyArrayDescr_Type` +. The :cdata:`PyArrayIter_Type` is the type for a flat iterator for an +ndarray (the object that is returned when getting the flat +attribute). The :cdata:`PyArrayMultiIter_Type` is the type of the +object returned when calling ``broadcast`` (). It handles iteration +and broadcasting over a collection of nested sequences. Also, the +:cdata:`PyArrayDescr_Type` is the data-type-descriptor type whose +instances describe the data. Finally, there are 21 new scalar-array +types which are new Python scalars corresponding to each of the +fundamental data types available for arrays. An additional 10 other +types are place holders that allow the array scalars to fit into a +hierarchy of actual Python types. + + +PyArray_Type +------------ + +.. cvar:: PyArray_Type + + The Python type of the ndarray is :cdata:`PyArray_Type`. In C, every + ndarray is a pointer to a :ctype:`PyArrayObject` structure. The ob_type + member of this structure contains a pointer to the :cdata:`PyArray_Type` + typeobject. + +.. ctype:: PyArrayObject + + The :ctype:`PyArrayObject` C-structure contains all of the required + information for an array. All instances of an ndarray (and its + subclasses) will have this structure. For future compatibility, + these structure members should normally be accessed using the + provided macros. If you need a shorter name, then you can make use + of :ctype:`NPY_AO` which is defined to be equivalent to + :ctype:`PyArrayObject`. + + .. code-block:: c + + typedef struct PyArrayObject { + PyObject_HEAD + char *data; + int nd; + npy_intp *dimensions; + npy_intp *strides; + PyObject *base; + PyArray_Descr *descr; + int flags; + PyObject *weakreflist; + } PyArrayObject; + +.. cmacro:: PyArrayObject.PyObject_HEAD + + This is needed by all Python objects. It consists of (at least) + a reference count member ( ``ob_refcnt`` ) and a pointer to the + typeobject ( ``ob_type`` ). (Other elements may also be present + if Python was compiled with special options see + Include/object.h in the Python source tree for more + information). The ob_type member points to a Python type + object. + +.. cmember:: char *PyArrayObject.data + + A pointer to the first element of the array. This pointer can + (and normally should) be recast to the data type of the array. + +.. cmember:: int PyArrayObject.nd + + An integer providing the number of dimensions for this + array. When nd is 0, the array is sometimes called a rank-0 + array. Such arrays have undefined dimensions and strides and + cannot be accessed. :cdata:`NPY_MAXDIMS` is the largest number of + dimensions for any array. + +.. cmember:: npy_intp PyArrayObject.dimensions + + An array of integers providing the shape in each dimension as + long as nd :math:`\geq` 1. The integer is always large enough + to hold a pointer on the platform, so the dimension size is + only limited by memory. + +.. cmember:: npy_intp *PyArrayObject.strides + + An array of integers providing for each dimension the number of + bytes that must be skipped to get to the next element in that + dimension. + +.. cmember:: PyObject *PyArrayObject.base + + This member is used to hold a pointer to another Python object + that is related to this array. There are two use cases: 1) If + this array does not own its own memory, then base points to the + Python object that owns it (perhaps another array object), 2) + If this array has the :cdata:`NPY_UPDATEIFCOPY` flag set, then this + array is a working copy of a "misbehaved" array. As soon as + this array is deleted, the array pointed to by base will be + updated with the contents of this array. + +.. cmember:: PyArray_Descr *PyArrayObject.descr + + A pointer to a data-type descriptor object (see below). The + data-type descriptor object is an instance of a new built-in + type which allows a generic description of memory. There is a + descriptor structure for each data type supported. This + descriptor structure contains useful information about the type + as well as a pointer to a table of function pointers to + implement specific functionality. + +.. cmember:: int PyArrayObject.flags + + Flags indicating how the memory pointed to by data is to be + interpreted. Possible flags are :cdata:`NPY_C_CONTIGUOUS`, + :cdata:`NPY_F_CONTIGUOUS`, :cdata:`NPY_OWNDATA`, :cdata:`NPY_ALIGNED`, + :cdata:`NPY_WRITEABLE`, and :cdata:`NPY_UPDATEIFCOPY`. + +.. cmember:: PyObject *PyArrayObject.weakreflist + + This member allows array objects to have weak references (using the + weakref module). + + +PyArrayDescr_Type +----------------- + +.. cvar:: PyArrayDescr_Type + + The :cdata:`PyArrayDescr_Type` is the built-in type of the + data-type-descriptor objects used to describe how the bytes comprising + the array are to be interpreted. There are 21 statically-defined + :ctype:`PyArray_Descr` objects for the built-in data-types. While these + participate in reference counting, their reference count should never + reach zero. There is also a dynamic table of user-defined + :ctype:`PyArray_Descr` objects that is also maintained. Once a + data-type-descriptor object is "registered" it should never be + deallocated either. The function :cfunc:`PyArray_DescrFromType` (...) can + be used to retrieve a :ctype:`PyArray_Descr` object from an enumerated + type-number (either built-in or user- defined). + +.. ctype:: PyArray_Descr + + The format of the :ctype:`PyArray_Descr` structure that lies at the + heart of the :cdata:`PyArrayDescr_Type` is + + .. code-block:: c + + typedef struct { + PyObject_HEAD + PyTypeObject *typeobj; + char kind; + char type; + char byteorder; + char hasobject; + int type_num; + int elsize; + int alignment; + PyArray_ArrayDescr *subarray; + PyObject *fields; + PyArray_ArrFuncs *f; + } PyArray_Descr; + +.. cmember:: PyTypeObject *PyArray_Descr.typeobj + + Pointer to a typeobject that is the corresponding Python type for + the elements of this array. For the builtin types, this points to + the corresponding array scalar. For user-defined types, this + should point to a user-defined typeobject. This typeobject can + either inherit from array scalars or not. If it does not inherit + from array scalars, then the :cdata:`NPY_USE_GETITEM` and + :cdata:`NPY_USE_SETITEM` flags should be set in the ``hasobject`` flag. + +.. cmember:: char PyArray_Descr.kind + + A character code indicating the kind of array (using the array + interface typestring notation). A 'b' represents Boolean, a 'i' + represents signed integer, a 'u' represents unsigned integer, 'f' + represents floating point, 'c' represents complex floating point, 'S' + represents 8-bit character string, 'U' represents 32-bit/character + unicode string, and 'V' repesents arbitrary. + +.. cmember:: char PyArray_Descr.type + + A traditional character code indicating the data type. + +.. cmember:: char PyArray_Descr.byteorder + + A character indicating the byte-order: '>' (big-endian), '<' (little- + endian), '=' (native), '\|' (irrelevant, ignore). All builtin data- + types have byteorder '='. + +.. cmember:: char PyArray_Descr.hasobject + + A data-type bit-flag that determines if the data-type exhibits object- + array like behavior. Each bit in this member is a flag which are named + as: + + .. cvar:: NPY_ITEM_REFCOUNT + + .. cvar:: NPY_ITEM_HASOBJECT + + Indicates that items of this data-type must be reference + counted (using :cfunc:`Py_INCREF` and :cfunc:`Py_DECREF` ). + + .. cvar:: NPY_ITEM_LISTPICKLE + + Indicates arrays of this data-type must be converted to a list + before pickling. + + .. cvar:: NPY_ITEM_IS_POINTER + + Indicates the item is a pointer to some other data-type + + .. cvar:: NPY_NEEDS_INIT + + Indicates memory for this data-type must be initialized (set + to 0) on creation. + + .. cvar:: NPY_NEEDS_PYAPI + + Indicates this data-type requires the Python C-API during + access (so don't give up the GIL if array access is going to + be needed). + + .. cvar:: NPY_USE_GETITEM + + On array access use the ``f->getitem`` function pointer + instead of the standard conversion to an array scalar. Must + use if you don't define an array scalar to go along with + the data-type. + + .. cvar:: NPY_USE_SETITEM + + When creating a 0-d array from an array scalar use + ``f->setitem`` instead of the standard copy from an array + scalar. Must use if you don't define an array scalar to go + along with the data-type. + + .. cvar:: NPY_FROM_FIELDS + + The bits that are inherited for the parent data-type if these + bits are set in any field of the data-type. Currently ( + :cdata:`NPY_NEEDS_INIT` \| :cdata:`NPY_LIST_PICKLE` \| + :cdata:`NPY_ITEM_REFCOUNT` \| :cdata:`NPY_NEEDS_PYAPI` ). + + .. cvar:: NPY_OBJECT_DTYPE_FLAGS + + Bits set for the object data-type: ( :cdata:`NPY_LIST_PICKLE` + \| :cdata:`NPY_USE_GETITEM` \| :cdata:`NPY_ITEM_IS_POINTER` \| + :cdata:`NPY_REFCOUNT` \| :cdata:`NPY_NEEDS_INIT` \| + :cdata:`NPY_NEEDS_PYAPI`). + + .. cfunction:: PyDataType_FLAGCHK(PyArray_Descr *dtype, int flags) + + Return true if all the given flags are set for the data-type + object. + + .. cfunction:: PyDataType_REFCHK(PyArray_Descr *dtype) + + Equivalent to :cfunc:`PyDataType_FLAGCHK` (*dtype*, + :cdata:`NPY_ITEM_REFCOUNT`). + +.. cmember:: int PyArray_Descr.type_num + + A number that uniquely identifies the data type. For new data-types, + this number is assigned when the data-type is registered. + +.. cmember:: int PyArray_Descr.elsize + + For data types that are always the same size (such as long), this + holds the size of the data type. For flexible data types where + different arrays can have a different elementsize, this should be + 0. + +.. cmember:: int PyArray_Descr.alignment + + A number providing alignment information for this data type. + Specifically, it shows how far from the start of a 2-element + structure (whose first element is a ``char`` ), the compiler + places an item of this type: ``offsetof(struct {char c; type v;}, + v)`` + +.. cmember:: PyArray_ArrayDescr *PyArray_Descr.subarray + + If this is non- ``NULL``, then this data-type descriptor is a + C-style contiguous array of another data-type descriptor. In + other-words, each element that this descriptor describes is + actually an array of some other base descriptor. This is most + useful as the data-type descriptor for a field in another + data-type descriptor. The fields member should be ``NULL`` if this + is non- ``NULL`` (the fields member of the base descriptor can be + non- ``NULL`` however). The :ctype:`PyArray_ArrayDescr` structure is + defined using + + .. code-block:: c + + typedef struct { + PyArray_Descr *base; + PyObject *shape; + } PyArray_ArrayDescr; + + The elements of this structure are: + + .. cmember:: PyArray_Descr *PyArray_ArrayDescr.base + + The data-type-descriptor object of the base-type. + + .. cmember:: PyObject *PyArray_ArrayDescr.shape + + The shape (always C-style contiguous) of the sub-array as a Python + tuple. + + +.. cmember:: PyObject *PyArray_Descr.fields + + If this is non-NULL, then this data-type-descriptor has fields + described by a Python dictionary whose keys are names (and also + titles if given) and whose values are tuples that describe the + fields. Recall that a data-type-descriptor always describes a + fixed-length set of bytes. A field is a named sub-region of that + total, fixed-length collection. A field is described by a tuple + composed of another data- type-descriptor and a byte + offset. Optionally, the tuple may contain a title which is + normally a Python string. These tuples are placed in this + dictionary keyed by name (and also title if given). + +.. cmember:: PyArray_ArrFuncs *PyArray_Descr.f + + A pointer to a structure containing functions that the type needs + to implement internal features. These functions are not the same + thing as the universal functions (ufuncs) described later. Their + signatures can vary arbitrarily. + +.. ctype:: PyArray_ArrFuncs + + Functions implementing internal features. Not all of these + function pointers must be defined for a given type. The required + members are ``nonzero``, ``copyswap``, ``copyswapn``, ``setitem``, + ``getitem``, and ``cast``. These are assumed to be non- ``NULL`` + and ``NULL`` entries will cause a program crash. The other + functions may be ``NULL`` which will just mean reduced + functionality for that data-type. (Also, the nonzero function will + be filled in with a default function if it is ``NULL`` when you + register a user-defined data-type). + + .. code-block:: c + + typedef struct { + PyArray_VectorUnaryFunc *cast[PyArray_NTYPES]; + PyArray_GetItemFunc *getitem; + PyArray_SetItemFunc *setitem; + PyArray_CopySwapNFunc *copyswapn; + PyArray_CopySwapFunc *copyswap; + PyArray_CompareFunc *compare; + PyArray_ArgFunc *argmax; + PyArray_DotFunc *dotfunc; + PyArray_ScanFunc *scanfunc; + PyArray_FromStrFunc *fromstr; + PyArray_NonzeroFunc *nonzero; + PyArray_FillFunc *fill; + PyArray_FillWithScalarFunc *fillwithscalar; + PyArray_SortFunc *sort[PyArray_NSORTS]; + PyArray_ArgSortFunc *argsort[PyArray_NSORTS]; + PyObject *castdict; + PyArray_ScalarKindFunc *scalarkind; + int **cancastscalarkindto; + int *cancastto; + int listpickle + } PyArray_ArrFuncs; + + The concept of a behaved segment is used in the description of the + function pointers. A behaved segment is one that is aligned and in + native machine byte-order for the data-type. The ``nonzero``, + ``copyswap``, ``copyswapn``, ``getitem``, and ``setitem`` + functions can (and must) deal with mis-behaved arrays. The other + functions require behaved memory segments. + + .. cmember:: void cast(void *from, void *to, npy_intp n, void *fromarr, void *toarr) + + An array of function pointers to cast from the current type to + all of the other builtin types. Each function casts a + contiguous, aligned, and notswapped buffer pointed at by + *from* to a contiguous, aligned, and notswapped buffer pointed + at by *to* The number of items to cast is given by *n*, and + the arguments *fromarr* and *toarr* are interpreted as + PyArrayObjects for flexible arrays to get itemsize + information. + + .. cmember:: PyObject *getitem(void *data, void *arr) + + A pointer to a function that returns a standard Python object + from a single element of the array object *arr* pointed to by + *data*. This function must be able to deal with "misbehaved + "(misaligned and/or swapped) arrays correctly. + + .. cmember:: int setitem(PyObject *item, void *data, void *arr) + + A pointer to a function that sets the Python object *item* + into the array, *arr*, at the position pointed to by *data* + . This function deals with "misbehaved" arrays. If successful, + a zero is returned, otherwise, a negative one is returned (and + a Python error set). + + .. cmember:: void copyswapn(void *dest, npy_intp dstride, void *src, npy_intp sstride, npy_intp n, int swap, void *arr) + + .. cmember:: void copyswap(void *dest, void *src, int swap, void *arr) + + These members are both pointers to functions to copy data from + *src* to *dest* and *swap* if indicated. The value of arr is + only used for flexible ( :cdata:`NPY_STRING`, :cdata:`NPY_UNICODE`, + and :cdata:`NPY_VOID` ) arrays (and is obtained from + ``arr->descr->elsize`` ). The second function copies a single + value, while the first loops over n values with the provided + strides. These functions can deal with misbehaved *src* + data. If *src* is NULL then no copy is performed. If *swap* is + 0, then no byteswapping occurs. It is assumed that *dest* and + *src* do not overlap. If they overlap, then use ``memmove`` + (...) first followed by ``copyswap(n)`` with NULL valued + ``src``. + + .. cmember:: int compare(const void* d1, const void* d2, void* arr) + + A pointer to a function that compares two elements of the + array, ``arr``, pointed to by ``d1`` and ``d2``. This + function requires behaved arrays. The return value is 1 if * + ``d1`` > * ``d2``, 0 if * ``d1`` == * ``d2``, and -1 if * + ``d1`` < * ``d2``. The array object arr is used to retrieve + itemsize and field information for flexible arrays. + + .. cmember:: int argmax(void* data, npy_intp n, npy_intp* max_ind, void* arr) + + A pointer to a function that retrieves the index of the + largest of ``n`` elements in ``arr`` beginning at the element + pointed to by ``data``. This function requires that the + memory segment be contiguous and behaved. The return value is + always 0. The index of the largest element is returned in + ``max_ind``. + + .. cmember:: void dotfunc(void* ip1, npy_intp is1, void* ip2, npy_intp is2, void* op, npy_intp n, void* arr) + + A pointer to a function that multiplies two ``n`` -length + sequences together, adds them, and places the result in + element pointed to by ``op`` of ``arr``. The start of the two + sequences are pointed to by ``ip1`` and ``ip2``. To get to + the next element in each sequence requires a jump of ``is1`` + and ``is2`` *bytes*, respectively. This function requires + behaved (though not necessarily contiguous) memory. + + .. cmember:: int scanfunc(FILE* fd, void* ip , void* sep , void* arr) + + A pointer to a function that scans (scanf style) one element + of the corresponding type from the file descriptor ``fd`` into + the array memory pointed to by ``ip``. The array is assumed + to be behaved. If ``sep`` is not NULL, then a separator string + is also scanned from the file before returning. The last + argument ``arr`` is the array to be scanned into. A 0 is + returned if the scan is successful. A negative number + indicates something went wrong: -1 means the end of file was + reached before the separator string could be scanned, -4 means + that the end of file was reached before the element could be + scanned, and -3 means that the element could not be + interpreted from the format string. Requires a behaved array. + + .. cmember:: int fromstr(char* str, void* ip, char** endptr, void* arr) + + A pointer to a function that converts the string pointed to by + ``str`` to one element of the corresponding type and places it + in the memory location pointed to by ``ip``. After the + conversion is completed, ``*endptr`` points to the rest of the + string. The last argument ``arr`` is the array into which ip + points (needed for variable-size data- types). Returns 0 on + success or -1 on failure. Requires a behaved array. + + .. cmember:: Bool nonzero(void* data, void* arr) + + A pointer to a function that returns TRUE if the item of + ``arr`` pointed to by ``data`` is nonzero. This function can + deal with misbehaved arrays. + + .. cmember:: void fill(void* data, npy_intp length, void* arr) + + A pointer to a function that fills a contiguous array of given + length with data. The first two elements of the array must + already be filled- in. From these two values, a delta will be + computed and the values from item 3 to the end will be + computed by repeatedly adding this computed delta. The data + buffer must be well-behaved. + + .. cmember:: void fillwithscalar(void* buffer, npy_intp length, void* value, void* arr) + + A pointer to a function that fills a contiguous ``buffer`` of + the given ``length`` with a single scalar ``value`` whose + address is given. The final argument is the array which is + needed to get the itemsize for variable-length arrays. + + .. cmember:: int sort(void* start, npy_intp length, void* arr) + + An array of function pointers to a particular sorting + algorithms. A particular sorting algorithm is obtained using a + key (so far :cdata:`PyArray_QUICKSORT`, :data`PyArray_HEAPSORT`, and + :cdata:`PyArray_MERGESORT` are defined). These sorts are done + in-place assuming contiguous and aligned data. + + .. cmember:: int argsort(void* start, npy_intp* result, npy_intp length, void \*arr) + + An array of function pointers to sorting algorithms for this + data type. The same sorting algorithms as for sort are + available. The indices producing the sort are returned in + result (which must be initialized with indices 0 to length-1 + inclusive). + + .. cmember:: PyObject *castdict + + Either ``NULL`` or a dictionary containing low-level casting + functions for user- defined data-types. Each function is + wrapped in a :ctype:`PyCObject *` and keyed by the data-type number. + + .. cmember:: PyArray_SCALARKIND scalarkind(PyArrayObject* arr) + + A function to determine how scalars of this type should be + interpreted. The argument is ``NULL`` or a 0-dimensional array + containing the data (if that is needed to determine the kind + of scalar). The return value must be of type + :ctype:`PyArray_SCALARKIND`. + + .. cmember:: int **cancastscalarkindto + + Either ``NULL`` or an array of :ctype:`PyArray_NSCALARKINDS` + pointers. These pointers should each be either ``NULL`` or a + pointer to an array of integers (terminated by + :cdata:`PyArray_NOTYPE`) indicating data-types that a scalar of + this data-type of the specified kind can be cast to safely + (this usually means without losing precision). + + .. cmember:: int *cancastto + + Either ``NULL`` or an array of integers (terminated by + :cdata:`PyArray_NOTYPE` ) indicated data-types that this data-type + can be cast to safely (this usually means without losing + precision). + + .. cmember:: int listpickle + + Unused. + +The :cdata:`PyArray_Type` typeobject implements many of the features of +Python objects including the tp_as_number, tp_as_sequence, +tp_as_mapping, and tp_as_buffer interfaces. The rich comparison +(tp_richcompare) is also used along with new-style attribute lookup +for methods (tp_methods) and properties (tp_getset). The +:cdata:`PyArray_Type` can also be sub-typed. + +.. tip:: + + The tp_as_number methods use a generic approach to call whatever + function has been registered for handling the operation. The + function PyNumeric_SetOps(..) can be used to register functions to + handle particular mathematical operations (for all arrays). When + the umath module is imported, it sets the numeric operations for + all arrays to the corresponding ufuncs. The tp_str and tp_repr + methods can also be altered using PyString_SetStringFunction(...). + + +PyUFunc_Type +------------ + +.. cvar:: PyUFunc_Type + + The ufunc object is implemented by creation of the + :cdata:`PyUFunc_Type`. It is a very simple type that implements only + basic getattribute behavior, printing behavior, and has call + behavior which allows these objects to act like functions. The + basic idea behind the ufunc is to hold a reference to fast + 1-dimensional (vector) loops for each data type that supports the + operation. These one-dimensional loops all have the same signature + and are the key to creating a new ufunc. They are called by the + generic looping code as appropriate to implement the N-dimensional + function. There are also some generic 1-d loops defined for + floating and complexfloating arrays that allow you to define a + ufunc using a single scalar function (*e.g.* atanh). + + +.. ctype:: PyUFuncObject + + The core of the ufunc is the :ctype:`PyUFuncObject` which contains all + the information needed to call the underlying C-code loops that + perform the actual work. It has the following structure: + + .. code-block:: c + + typedef struct { + PyObject_HEAD + int nin; + int nout; + int nargs; + int identity; + PyUFuncGenericFunction *functions; + void **data; + int ntypes; + int check_return; + char *name; + char *types; + char *doc; + void *ptr; + PyObject *obj; + PyObject *userloops; + } PyUFuncObject; + + .. cmacro:: PyUFuncObject.PyObject_HEAD + + required for all Python objects. + + .. cmember:: int PyUFuncObject.nin + + The number of input arguments. + + .. cmember:: int PyUFuncObject.nout + + The number of output arguments. + + .. cmember:: int PyUFuncObject.nargs + + The total number of arguments (*nin* + *nout*). This must be + less than :cdata:`NPY_MAXARGS`. + + .. cmember:: int PyUFuncObject.identity + + Either :cdata:`PyUFunc_One`, :cdata:`PyUFunc_Zero`, or :cdata:`PyUFunc_None` + to indicate the identity for this operation. It is only used + for a reduce-like call on an empty array. + + .. cmember:: void PyUFuncObject.functions(char** args, npy_intp* dims, npy_intp* steps, void* extradata) + + An array of function pointers --- one for each data type + supported by the ufunc. This is the vector loop that is called + to implement the underlying function *dims* [0] times. The + first argument, *args*, is an array of *nargs* pointers to + behaved memory. Pointers to the data for the input arguments + are first, followed by the pointers to the data for the output + arguments. How many bytes must be skipped to get to the next + element in the sequence is specified by the corresponding entry + in the *steps* array. The last argument allows the loop to + receive extra information. This is commonly used so that a + single, generic vector loop can be used for multiple + functions. In this case, the actual scalar function to call is + passed in as *extradata*. The size of this function pointer + array is ntypes. + + .. cmember:: void **PyUFuncObject.data + + Extra data to be passed to the 1-d vector loops or ``NULL`` if + no extra-data is needed. This C-array must be the same size ( + *i.e.* ntypes) as the functions array. ``NULL`` is used if + extra_data is not needed. Several C-API calls for UFuncs are + just 1-d vector loops that make use of this extra data to + receive a pointer to the actual function to call. + + .. cmember:: int PyUFuncObject.ntypes + + The number of supported data types for the ufunc. This number + specifies how many different 1-d loops (of the builtin data types) are + available. + + .. cmember:: int PyUFuncObject.check_return + + Obsolete and unused. However, it is set by the corresponding entry in + the main ufunc creation routine: :cfunc:`PyUFunc_FromFuncAndData` (...). + + .. cmember:: char *PyUFuncObject.name + + A string name for the ufunc. This is used dynamically to build + the __doc\__ attribute of ufuncs. + + .. cmember:: char *PyUFuncObject.types + + An array of *nargs* :math:`\times` *ntypes* 8-bit type_numbers + which contains the type signature for the function for each of + the supported (builtin) data types. For each of the *ntypes* + functions, the corresponding set of type numbers in this array + shows how the *args* argument should be interpreted in the 1-d + vector loop. These type numbers do not have to be the same type + and mixed-type ufuncs are supported. + + .. cmember:: char *PyUFuncObject.doc + + Documentation for the ufunc. Should not contain the function + signature as this is generated dynamically when __doc\__ is + retrieved. + + .. cmember:: void *PyUFuncObject.ptr + + Any dynamically allocated memory. Currently, this is used for dynamic + ufuncs created from a python function to store room for the types, + data, and name members. + + .. cmember:: PyObject *PyUFuncObject.obj + + For ufuncs dynamically created from python functions, this member + holds a reference to the underlying Python function. + + .. cmember:: PyObject *PyUFuncObject.userloops + + A dictionary of user-defined 1-d vector loops (stored as CObject ptrs) + for user-defined types. A loop may be registered by the user for any + user-defined type. It is retrieved by type number. User defined type + numbers are always larger than :cdata:`NPY_USERDEF`. + + +PyArrayIter_Type +---------------- + +.. cvar:: PyArrayIter_Type + + This is an iterator object that makes it easy to loop over an N-dimensional + array. It is the object returned from the flat attribute of an + ndarray. It is also used extensively throughout the implementation + internals to loop over an N-dimensional array. The tp_as_mapping + interface is implemented so that the iterator object can be indexed + (using 1-d indexing), and a few methods are implemented through the + tp_methods table. This object implements the next method and can be + used anywhere an iterator can be used in Python. + +.. ctype:: PyArrayIterObject + + The C-structure corresponding to an object of :cdata:`PyArrayIter_Type` is + the :ctype:`PyArrayIterObject`. The :ctype:`PyArrayIterObject` is used to keep + track of a pointer into an N-dimensional array. It contains associated + information used to quickly march through the array. The pointer can + be adjusted in three basic ways: 1) advance to the "next" position in + the array in a C-style contiguous fashion, 2) advance to an arbitrary + N-dimensional coordinate in the array, and 3) advance to an arbitrary + one-dimensional index into the array. The members of the + :ctype:`PyArrayIterObject` structure are used in these + calculations. Iterator objects keep their own dimension and strides + information about an array. This can be adjusted as needed for + "broadcasting," or to loop over only specific dimensions. + + .. code-block:: c + + typedef struct { + PyObject_HEAD + int nd_m1; + npy_intp index; + npy_intp size; + npy_intp coordinates[NPY_MAXDIMS]; + npy_intp dims_m1[NPY_MAXDIMS]; + npy_intp strides[NPY_MAXDIMS]; + npy_intp backstrides[NPY_MAXDIMS]; + npy_intp factors[NPY_MAXDIMS]; + PyArrayObject *ao; + char *dataptr; + Bool contiguous; + } PyArrayIterObject; + + .. cmember:: int PyArrayIterObject.nd_m1 + + :math:`N-1` where :math:`N` is the number of dimensions in the + underlying array. + + .. cmember:: npy_intp PyArrayIterObject.index + + The current 1-d index into the array. + + .. cmember:: npy_intp PyArrayIterObject.size + + The total size of the underlying array. + + .. cmember:: npy_intp *PyArrayIterObject.coordinates + + An :math:`N` -dimensional index into the array. + + .. cmember:: npy_intp *PyArrayIterObject.dims_m1 + + The size of the array minus 1 in each dimension. + + .. cmember:: npy_intp *PyArrayIterObject.strides + + The strides of the array. How many bytes needed to jump to the next + element in each dimension. + + .. cmember:: npy_intp *PyArrayIterObject.backstrides + + How many bytes needed to jump from the end of a dimension back + to its beginning. Note that *backstrides* [k]= *strides* [k]*d + *ims_m1* [k], but it is stored here as an optimization. + + .. cmember:: npy_intp *PyArrayIterObject.factors + + This array is used in computing an N-d index from a 1-d index. It + contains needed products of the dimensions. + + .. cmember:: PyArrayObject *PyArrayIterObject.ao + + A pointer to the underlying ndarray this iterator was created to + represent. + + .. cmember:: char *PyArrayIterObject.dataptr + + This member points to an element in the ndarray indicated by the + index. + + .. cmember:: Bool PyArrayIterObject.contiguous + + This flag is true if the underlying array is + :cdata:`NPY_C_CONTIGUOUS`. It is used to simplify calculations when + possible. + + +How to use an array iterator on a C-level is explained more fully in +later sections. Typically, you do not need to concern yourself with +the internal structure of the iterator object, and merely interact +with it through the use of the macros :cfunc:`PyArray_ITER_NEXT` (it), +:cfunc:`PyArray_ITER_GOTO` (it, dest), or :cfunc:`PyArray_ITER_GOTO1D` (it, +index). All of these macros require the argument *it* to be a +:ctype:`PyArrayIterObject *`. + + +PyArrayMultiIter_Type +--------------------- + +.. cvar:: PyArrayMultiIter_Type + + This type provides an iterator that encapsulates the concept of + broadcasting. It allows :math:`N` arrays to be broadcast together + so that the loop progresses in C-style contiguous fashion over the + broadcasted array. The corresponding C-structure is the + :ctype:`PyArrayMultiIterObject` whose memory layout must begin any + object, *obj*, passed in to the :cfunc:`PyArray_Broadcast` (obj) + function. Broadcasting is performed by adjusting array iterators so + that each iterator represents the broadcasted shape and size, but + has its strides adjusted so that the correct element from the array + is used at each iteration. + + +.. ctype:: PyArrayMultiIterObject + + .. code-block:: c + + typedef struct { + PyObject_HEAD + int numiter; + npy_intp size; + npy_intp index; + int nd; + npy_intp dimensions[NPY_MAXDIMS]; + PyArrayIterObject *iters[NPY_MAXDIMS]; + } PyArrayMultiIterObject; + + .. cmacro:: PyArrayMultiIterObject.PyObject_HEAD + + Needed at the start of every Python object (holds reference count and + type identification). + + .. cmember:: int PyArrayMultiIterObject.numiter + + The number of arrays that need to be broadcast to the same shape. + + .. cmember:: npy_intp PyArrayMultiIterObject.size + + The total broadcasted size. + + .. cmember:: npy_intp PyArrayMultiIterObject.index + + The current (1-d) index into the broadcasted result. + + .. cmember:: int PyArrayMultiIterObject.nd + + The number of dimensions in the broadcasted result. + + .. cmember:: npy_intp *PyArrayMultiIterObject.dimensions + + The shape of the broadcasted result (only ``nd`` slots are used). + + .. cmember:: PyArrayIterObject **PyArrayMultiIterObject.iters + + An array of iterator objects that holds the iterators for the arrays + to be broadcast together. On return, the iterators are adjusted for + broadcasting. + + +PyArrayFlags_Type +----------------- + +.. cvar:: PyArrayFlags_Type + + When the flags attribute is retrieved from Python, a special + builtin object of this type is constructed. This special type makes + it easier to work with the different flags by accessing them as + attributes or by accessing them as if the object were a dictionary + with the flag names as entries. + + +ScalarArrayTypes +---------------- + +There is a Python type for each of the different built-in data types +that can be present in the array Most of these are simple wrappers +around the corresponding data type in C. The C-names for these types +are :cdata:`Py{TYPE}ArrType_Type` where ``{TYPE}`` can be + + **Bool**, **Byte**, **Short**, **Int**, **Long**, **LongLong**, + **UByte**, **UShort**, **UInt**, **ULong**, **ULongLong**, + **Float**, **Double**, **LongDouble**, **CFloat**, **CDouble**, + **CLongDouble**, **String**, **Unicode**, **Void**, and + **Object**. + +These type names are part of the C-API and can therefore be created in +extension C-code. There is also a :cdata:`PyIntpArrType_Type` and a +:cdata:`PyUIntpArrType_Type` that are simple substitutes for one of the +integer types that can hold a pointer on the platform. The structure +of these scalar objects is not exposed to C-code. The function +:cfunc:`PyArray_ScalarAsCtype` (..) can be used to extract the C-type value +from the array scalar and the function :cfunc:`PyArray_Scalar` (...) can be +used to construct an array scalar from a C-value. + + +Other C-Structures +================== + +A few new C-structures were found to be useful in the development of +NumPy. These C-structures are used in at least one C-API call and are +therefore documented here. The main reason these structures were +defined is to make it easy to use the Python ParseTuple C-API to +convert from Python objects to a useful C-Object. + + +PyArray_Dims +------------ + +.. ctype:: PyArray_Dims + + This structure is very useful when shape and/or strides information is + supposed to be interpreted. The structure is: + + .. code-block:: c + + typedef struct { + npy_intp *ptr; + int len; + } PyArray_Dims; + + The members of this structure are + + .. cmember:: npy_intp *PyArray_Dims.ptr + + A pointer to a list of (:ctype:`npy_intp`) integers which usually + represent array shape or array strides. + + .. cmember:: int PyArray_Dims.len + + The length of the list of integers. It is assumed safe to + access *ptr* [0] to *ptr* [len-1]. + + +PyArray_Chunk +------------- + +.. ctype:: PyArray_Chunk + + This is equivalent to the buffer object structure in Python up to + the ptr member. On 32-bit platforms (*i.e.* if :cdata:`NPY_SIZEOF_INT` + == :cdata:`NPY_SIZEOF_INTP` ) or in Python 2.5, the len member also + matches an equivalent member of the buffer object. It is useful to + represent a generic single- segment chunk of memory. + + .. code-block:: c + + typedef struct { + PyObject_HEAD + PyObject *base; + void *ptr; + npy_intp len; + int flags; + } PyArray_Chunk; + + The members are + + .. cmacro:: PyArray_Chunk.PyObject_HEAD + + Necessary for all Python objects. Included here so that the + :ctype:`PyArray_Chunk` structure matches that of the buffer object + (at least to the len member). + + .. cmember:: PyObject *PyArray_Chunk.base + + The Python object this chunk of memory comes from. Needed so that + memory can be accounted for properly. + + .. cmember:: void *PyArray_Chunk.ptr + + A pointer to the start of the single-segment chunk of memory. + + .. cmember:: npy_intp PyArray_Chunk.len + + The length of the segment in bytes. + + .. cmember:: int PyArray_Chunk.flags + + Any data flags (*e.g.* :cdata:`NPY_WRITEABLE` ) that should be used + to interpret the memory. + + +PyArrayInterface +---------------- + +.. seealso:: :ref:`arrays.interface` + +.. ctype:: PyArrayInterface + + The :ctype:`PyArrayInterface` structure is defined so that NumPy and + other extension modules can use the rapid array interface + protocol. The :obj:`__array_struct__` method of an object that + supports the rapid array interface protocol should return a + :ctype:`PyCObject` that contains a pointer to a :ctype:`PyArrayInterface` + structure with the relevant details of the array. After the new + array is created, the attribute should be ``DECREF``'d which will + free the :ctype:`PyArrayInterface` structure. Remember to ``INCREF`` the + object (whose :obj:`__array_struct__` attribute was retrieved) and + point the base member of the new :ctype:`PyArrayObject` to this same + object. In this way the memory for the array will be managed + correctly. + + .. code-block:: c + + typedef struct { + int two; + int nd; + char typekind; + int itemsize; + int flags; + npy_intp *shape; + npy_intp *strides; + void *data; + PyObject *descr; + } PyArrayInterface; + + .. cmember:: int PyArrayInterface.two + + the integer 2 as a sanity check. + + .. cmember:: int PyArrayInterface.nd + + the number of dimensions in the array. + + .. cmember:: char PyArrayInterface.typekind + + A character indicating what kind of array is present according to the + typestring convention with 't' -> bitfield, 'b' -> Boolean, 'i' -> + signed integer, 'u' -> unsigned integer, 'f' -> floating point, 'c' -> + complex floating point, 'O' -> object, 'S' -> string, 'U' -> unicode, + 'V' -> void. + + .. cmember:: int PyArrayInterface.itemsize + + The number of bytes each item in the array requires. + + .. cmember:: int PyArrayInterface.flags + + Any of the bits :cdata:`NPY_C_CONTIGUOUS` (1), + :cdata:`NPY_F_CONTIGUOUS` (2), :cdata:`NPY_ALIGNED` (0x100), + :cdata:`NPY_NOTSWAPPED` (0x200), or :cdata:`NPY_WRITEABLE` + (0x400) to indicate something about the data. The + :cdata:`NPY_ALIGNED`, :cdata:`NPY_C_CONTIGUOUS`, and + :cdata:`NPY_F_CONTIGUOUS` flags can actually be determined from + the other parameters. The flag :cdata:`NPY_ARR_HAS_DESCR` + (0x800) can also be set to indicate to objects consuming the + version 3 array interface that the descr member of the + structure is present (it will be ignored by objects consuming + version 2 of the array interface). + + .. cmember:: npy_intp *PyArrayInterface.shape + + An array containing the size of the array in each dimension. + + .. cmember:: npy_intp *PyArrayInterface.strides + + An array containing the number of bytes to jump to get to the next + element in each dimension. + + .. cmember:: void *PyArrayInterface.data + + A pointer *to* the first element of the array. + + .. cmember:: PyObject *PyArrayInterface.descr + + A Python object describing the data-type in more detail (same + as the *descr* key in :obj:`__array_interface__`). This can be + ``NULL`` if *typekind* and *itemsize* provide enough + information. This field is also ignored unless + :cdata:`ARR_HAS_DESCR` flag is on in *flags*. + + +Internally used structures +-------------------------- + +Internally, the code uses some additional Python objects primarily for +memory management. These types are not accessible directly from +Python, and are not exposed to the C-API. They are included here only +for completeness and assistance in understanding the code. + + +.. ctype:: PyUFuncLoopObject + + A loose wrapper for a C-structure that contains the information + needed for looping. This is useful if you are trying to understand + the ufunc looping code. The :ctype:`PyUFuncLoopObject` is the associated + C-structure. It is defined in the ``ufuncobject.h`` header. + +.. ctype:: PyUFuncReduceObject + + A loose wrapper for the C-structure that contains the information + needed for reduce-like methods of ufuncs. This is useful if you are + trying to understand the reduce, accumulate, and reduce-at + code. The :ctype:`PyUFuncReduceObject` is the associated C-structure. It + is defined in the ``ufuncobject.h`` header. + +.. ctype:: PyUFunc_Loop1d + + A simple linked-list of C-structures containing the information needed + to define a 1-d loop for a ufunc for every defined signature of a + user-defined data-type. + +.. cvar:: PyArrayMapIter_Type + + Advanced indexing is handled with this Python type. It is simply a + loose wrapper around the C-structure containing the variables + needed for advanced array indexing. The associated C-structure, + :ctype:`PyArrayMapIterObject`, is useful if you are trying to + understand the advanced-index mapping code. It is defined in the + ``arrayobject.h`` header. This type is not exposed to Python and + could be replaced with a C-structure. As a Python type it takes + advantage of reference- counted memory management. + diff --git a/doc/source/reference/c-api.ufunc.rst b/doc/source/reference/c-api.ufunc.rst new file mode 100644 index 000000000..8e4e625f0 --- /dev/null +++ b/doc/source/reference/c-api.ufunc.rst @@ -0,0 +1,335 @@ +UFunc API +========= + +.. sectionauthor:: Travis E. Oliphant + +.. index:: + pair: ufunc; C-API + + +Constants +--------- + +.. cvar:: UFUNC_ERR_{HANDLER} + + ``{HANDLER}`` can be **IGNORE**, **WARN**, **RAISE**, or **CALL** + +.. cvar:: UFUNC_{THING}_{ERR} + + ``{THING}`` can be **MASK**, **SHIFT**, or **FPE**, and ``{ERR}`` can + be **DIVIDEBYZERO**, **OVERFLOW**, **UNDERFLOW**, and **INVALID**. + +.. cvar:: PyUFunc_{VALUE} + + ``{VALUE}`` can be **One** (1), **Zero** (0), or **None** (-1) + + +Macros +------ + +.. cmacro:: NPY_LOOP_BEGIN_THREADS + + Used in universal function code to only release the Python GIL if + loop->obj is not true (*i.e.* this is not an OBJECT array + loop). Requires use of :cmacro:`NPY_BEGIN_THREADS_DEF` in variable + declaration area. + +.. cmacro:: NPY_LOOP_END_THREADS + + Used in universal function code to re-acquire the Python GIL if it + was released (because loop->obj was not true). + +.. cfunction:: UFUNC_CHECK_ERROR(loop) + + A macro used internally to check for errors and goto fail if + found. This macro requires a fail label in the current code + block. The *loop* variable must have at least members (obj, + errormask, and errorobj). If *loop* ->obj is nonzero, then + :cfunc:`PyErr_Occurred` () is called (meaning the GIL must be held). If + *loop* ->obj is zero, then if *loop* ->errormask is nonzero, + :cfunc:`PyUFunc_checkfperr` is called with arguments *loop* ->errormask + and *loop* ->errobj. If the result of this check of the IEEE + floating point registers is true then the code redirects to the + fail label which must be defined. + +.. cfunction:: UFUNC_CHECK_STATUS(ret) + + A macro that expands to platform-dependent code. The *ret* + variable can can be any integer. The :cdata:`UFUNC_FPE_{ERR}` bits are + set in *ret* according to the status of the corresponding error + flags of the floating point processor. + + +Functions +--------- + +.. cfunction:: PyObject* PyUFunc_FromFuncAndData(PyUFuncGenericFunction* func, void** data, char* types, int ntypes, int nin, int nout, int identity, char* name, char* doc, int check_return) + + Create a new broadcasting universal function from required variables. + Each ufunc builds around the notion of an element-by-element + operation. Each ufunc object contains pointers to 1-d loops + implementing the basic functionality for each supported type. + + :param nin: + + The number of inputs to this operation. + + :param nout: + + The number of outputs + + :param ntypes: + + How many different data-type "signatures" the ufunc has implemented. + + :param func: + + Must to an array of length *ntypes* containing + :ctype:`PyUFuncGenericFunction` items. These items are pointers to + functions that acutally implement the underlying + (element-by-element) function :math:`N` times. T + + :param types: + + Must be of length (*nin* + *nout*) \* *ntypes*, and it + contains the data-types (built-in only) that the corresponding + function in the *func* array can deal with. + + :param data: + + Should be ``NULL`` or a pointer to an array of size *ntypes* + . This array may contain arbitrary extra-data to be passed to + the corresponding 1-d loop function in the func array. + + :param name: + + The name for the ufunc. + + :param doc: + + Allows passing in a documentation string to be stored with the + ufunc. The documentation string should not contain the name + of the function or the calling signature as that will be + dynamically determined from the object and available when + accessing the **__doc__** attribute of the ufunc. + + :param check_return: + + Unused and present for backwards compatibility of the C-API. A + corresponding *check_return* integer does exist in the ufunc + structure and it does get set with this value when the ufunc + object is created. + +.. cfunction:: int PyUFunc_RegisterLoopForType(PyUFuncObject* ufunc, int usertype, PyUFuncGenericFunction function, int* arg_types, void* data) + + This function allows the user to register a 1-d loop with an + already- created ufunc to be used whenever the ufunc is called + with any of its input arguments as the user-defined + data-type. This is needed in order to make ufuncs work with + built-in data-types. The data-type must have been previously + registered with the numpy system. The loop is passed in as + *function*. This loop can take arbitrary data which should be + passed in as *data*. The data-types the loop requires are passed + in as *arg_types* which must be a pointer to memory at least as + large as ufunc->nargs. + +.. cfunction:: int PyUFunc_ReplaceLoopBySignature(PyUFuncObject* ufunc, PyUFuncGenericFunction newfunc, int* signature, PyUFuncGenericFunction* oldfunc) + + Replace a 1-d loop matching the given *signature* in the + already-created *ufunc* with the new 1-d loop newfunc. Return the + old 1-d loop function in *oldfunc*. Return 0 on success and -1 on + failure. This function works only with built-in types (use + :cfunc:`PyUFunc_RegisterLoopForType` for user-defined types). A + signature is an array of data-type numbers indicating the inputs + followed by the outputs assumed by the 1-d loop. + +.. cfunction:: int PyUFunc_GenericFunction(PyUFuncObject* self, PyObject* args, PyArrayObject** mps) + + A generic ufunc call. The ufunc is passed in as *self*, the + arguments to the ufunc as *args*. The *mps* argument is an array + of :ctype:`PyArrayObject` pointers containing the converted input + arguments as well as the ufunc outputs on return. The user is + responsible for managing this array and receives a new reference + for each array in *mps*. The total number of arrays in *mps* is + given by *self* ->nin + *self* ->nout. + +.. cfunction:: int PyUFunc_checkfperr(int errmask, PyObject* errobj) + + A simple interface to the IEEE error-flag checking support. The + *errmask* argument is a mask of :cdata:`UFUNC_MASK_{ERR}` bitmasks + indicating which errors to check for (and how to check for + them). The *errobj* must be a Python tuple with two elements: a + string containing the name which will be used in any communication + of error and either a callable Python object (call-back function) + or :cdata:`Py_None`. The callable object will only be used if + :cdata:`UFUNC_ERR_CALL` is set as the desired error checking + method. This routine manages the GIL and is safe to call even + after releasing the GIL. If an error in the IEEE-compatibile + hardware is determined a -1 is returned, otherwise a 0 is + returned. + +.. cfunction:: void PyUFunc_clearfperr() + + Clear the IEEE error flags. + +.. cfunction:: void PyUFunc_GetPyValues(char* name, int* bufsize, int* errmask, PyObject** errobj) + + Get the Python values used for ufunc processing from the + thread-local storage area unless the defaults have been set in + which case the name lookup is bypassed. The name is placed as a + string in the first element of *\*errobj*. The second element is + the looked-up function to call on error callback. The value of the + looked-up buffer-size to use is passed into *bufsize*, and the + value of the error mask is placed into *errmask*. + + +Generic functions +----------------- + +At the core of every ufunc is a collection of type-specific functions +that defines the basic functionality for each of the supported types. +These functions must evaluate the underlying function :math:`N\geq1` +times. Extra-data may be passed in that may be used during the +calculation. This feature allows some general functions to be used as +these basic looping functions. The general function has all the code +needed to point variables to the right place and set up a function +call. The general function assumes that the actual function to call is +passed in as the extra data and calls it with the correct values. All +of these functions are suitable for placing directly in the array of +functions stored in the functions member of the PyUFuncObject +structure. + +.. cfunction:: void PyUFunc_f_f_As_d_d(char** args, npy_intp* dimensions, npy_intp* steps, void* func) + +.. cfunction:: void PyUFunc_d_d(char** args, npy_intp* dimensions, npy_intp* steps, void* func) + +.. cfunction:: void PyUFunc_f_f(char** args, npy_intp* dimensions, npy_intp* steps, void* func) + +.. cfunction:: void PyUFunc_g_g(char** args, npy_intp* dimensions, npy_intp* steps, void* func) + +.. cfunction:: void PyUFunc_F_F_As_D_D(char** args, npy_intp* dimensions, npy_intp* steps, void* func) + +.. cfunction:: void PyUFunc_F_F(char** args, npy_intp* dimensions, npy_intp* steps, void* func) + +.. cfunction:: void PyUFunc_D_D(char** args, npy_intp* dimensions, npy_intp* steps, void* func) + +.. cfunction:: void PyUFunc_G_G(char** args, npy_intp* dimensions, npy_intp* steps, void* func) + + Type specific, core 1-d functions for ufuncs where each + calculation is obtained by calling a function taking one input + argument and returning one output. This function is passed in + ``func``. The letters correspond to dtypechar's of the supported + data types ( ``f`` - float, ``d`` - double, ``g`` - long double, + ``F`` - cfloat, ``D`` - cdouble, ``G`` - clongdouble). The + argument *func* must support the same signature. The _As_X_X + variants assume ndarray's of one data type but cast the values to + use an underlying function that takes a different data type. Thus, + :cfunc:`PyUFunc_f_f_As_d_d` uses ndarrays of data type :cdata:`NPY_FLOAT` + but calls out to a C-function that takes double and returns + double. + +.. cfunction:: void PyUFunc_ff_f_As_dd_d(char** args, npy_intp* dimensions, npy_intp* steps, void* func) + +.. cfunction:: void PyUFunc_ff_f(char** args, npy_intp* dimensions, npy_intp* steps, void* func) + +.. cfunction:: void PyUFunc_dd_d(char** args, npy_intp* dimensions, npy_intp* steps, void* func) + +.. cfunction:: void PyUFunc_gg_g(char** args, npy_intp* dimensions, npy_intp* steps, void* func) + +.. cfunction:: void PyUFunc_FF_F_As_DD_D(char** args, npy_intp* dimensions, npy_intp* steps, void* func) + +.. cfunction:: void PyUFunc_DD_D(char** args, npy_intp* dimensions, npy_intp* steps, void* func) + +.. cfunction:: void PyUFunc_FF_F(char** args, npy_intp* dimensions, npy_intp* steps, void* func) + +.. cfunction:: void PyUFunc_GG_G(char** args, npy_intp* dimensions, npy_intp* steps, void* func) + + Type specific, core 1-d functions for ufuncs where each + calculation is obtained by calling a function taking two input + arguments and returning one output. The underlying function to + call is passed in as *func*. The letters correspond to + dtypechar's of the specific data type supported by the + general-purpose function. The argument ``func`` must support the + corresponding signature. The ``_As_XX_X`` variants assume ndarrays + of one data type but cast the values at each iteration of the loop + to use the underlying function that takes a different data type. + +.. cfunction:: void PyUFunc_O_O(char** args, npy_intp* dimensions, npy_intp* steps, void* func) + +.. cfunction:: void PyUFunc_OO_O(char** args, npy_intp* dimensions, npy_intp* steps, void* func) + + One-input, one-output, and two-input, one-output core 1-d functions + for the :cdata:`NPY_OBJECT` data type. These functions handle reference count + issues and return early on error. The actual function to call is *func* + and it must accept calls with the signature ``(PyObject*)(PyObject*)`` + for :cfunc:`PyUFunc_O_O` or ``(PyObject*)(PyObject *, PyObject *)`` + for :cfunc:`PyUFunc_OO_O`. + +.. cfunction:: void PyUFunc_O_O_method(char** args, npy_intp* dimensions, npy_intp* steps, void* func) + + This general purpose 1-d core function assumes that *func* is a string + representing a method of the input object. For each + iteration of the loop, the Python obejct is extracted from the array + and its *func* method is called returning the result to the output array. + +.. cfunction:: void PyUFunc_OO_O_method(char** args, npy_intp* dimensions, npy_intp* steps, void* func) + + This general purpose 1-d core function assumes that *func* is a + string representing a method of the input object that takes one + argument. The first argument in *args* is the method whose function is + called, the second argument in *args* is the argument passed to the + function. The output of the function is stored in the third entry + of *args*. + +.. cfunction:: void PyUFunc_On_Om(char** args, npy_intp* dimensions, npy_intp* steps, void* func) + + This is the 1-d core function used by the dynamic ufuncs created + by umath.frompyfunc(function, nin, nout). In this case *func* is a + pointer to a :ctype:`PyUFunc_PyFuncData` structure which has definition + + .. ctype:: PyUFunc_PyFuncData + + .. code-block:: c + + typedef struct { + int nin; + int nout; + PyObject *callable; + } PyUFunc_PyFuncData; + + At each iteration of the loop, the *nin* input objects are exctracted + from their object arrays and placed into an argument tuple, the Python + *callable* is called with the input arguments, and the nout + outputs are placed into their object arrays. + + +Importing the API +----------------- + +.. cvar:: PY_UFUNC_UNIQUE_SYMBOL + +.. cvar:: NO_IMPORT_UFUNC + +.. cfunction:: void import_ufunc(void) + + These are the constants and functions for accessing the ufunc + C-API from extension modules in precisely the same way as the + array C-API can be accessed. The ``import_ufunc`` () function must + always be called (in the initialization subroutine of the + extension module). If your extension module is in one file then + that is all that is required. The other two constants are useful + if your extension module makes use of multiple files. In that + case, define :cdata:`PY_UFUNC_UNIQUE_SYMBOL` to something unique to + your code and then in source files that do not contain the module + initialization function but still need access to the UFUNC API, + define :cdata:`PY_UFUNC_UNIQUE_SYMBOL` to the same name used previously + and also define :cdata:`NO_IMPORT_UFUNC`. + + The C-API is actually an array of function pointers. This array is + created (and pointed to by a global variable) by import_ufunc. The + global variable is either statically defined or allowed to be seen + by other files depending on the state of + :cdata:`Py_UFUNC_UNIQUE_SYMBOL` and :cdata:`NO_IMPORT_UFUNC`. + +.. index:: + pair: ufunc; C-API diff --git a/doc/source/reference/distutils.rst b/doc/source/reference/distutils.rst new file mode 100644 index 000000000..b01c0bfc5 --- /dev/null +++ b/doc/source/reference/distutils.rst @@ -0,0 +1,413 @@ +********************************** +Packaging (:mod:`numpy.distutils`) +********************************** + +.. module:: numpy.distutils + +NumPy provides enhanced distutils functionality to make it easier to +build and install sub-packages, auto-generate code, and extension +modules that use Fortran-compiled libraries. To use features of numpy +distutils, use the :func:`setup <core.setup>` command from +:mod:`numpy.distutils.core`. A useful :class:`Configuration +<misc_util.Configuration>` class is also provided in +:mod:`numpy.distutils.misc_util` that can make it easier to construct +keyword arguments to pass to the setup function (by passing the +dictionary obtained from the todict() method of the class). More +information is available in the NumPy Distutils Users Guide in +``<site-packages>/numpy/doc/DISTUTILS.txt``. + +.. index:: + single: distutils + + +Modules in :mod:`numpy.distutils` +================================= + +misc_util +--------- + +.. module:: numpy.distutils.misc_util + +.. autosummary:: + :toctree: generated/ + + Configuration + get_numpy_include_dirs + get_numarray_include_dirs + dict_append + appendpath + allpath + dot_join + generate_config_py + get_cmd + terminal_has_colors + red_text + green_text + yellow_text + blue_text + cyan_text + cyg2win32 + all_strings + has_f_sources + has_cxx_sources + filter_sources + get_dependencies + is_local_src_dir + get_ext_source_files + get_script_files + + +.. class:: Configuration(package_name=None, parent_name=None, top_path=None, package_path=None, **attrs) + + Construct a configuration instance for the given package name. If + *parent_name* is not :const:`None`, then construct the package as a + sub-package of the *parent_name* package. If *top_path* and + *package_path* are :const:`None` then they are assumed equal to + the path of the file this instance was created in. The setup.py + files in the numpy distribution are good examples of how to use + the :class:`Configuration` instance. + + .. method:: todict() + + Return a dictionary compatible with the keyword arguments of distutils + setup function. Thus, this method may be used as + setup(\**config.todict()). + + .. method:: get_distribution() + + Return the distutils distribution object for self. + + .. method:: get_subpackage(subpackage_name, subpackage_path=None) + + Return a Configuration instance for the sub-package given. If + subpackage_path is None then the path is assumed to be the local path + plus the subpackage_name. If a setup.py file is not found in the + subpackage_path, then a default configuration is used. + + .. method:: add_subpackage(subpackage_name, subpackage_path=None) + + Add a sub-package to the current Configuration instance. This is + useful in a setup.py script for adding sub-packages to a package. The + sub-package is contained in subpackage_path / subpackage_name and this + directory may contain a setup.py script or else a default setup + (suitable for Python-code-only subpackages) is assumed. If the + subpackage_path is None, then it is assumed to be located in the local + path / subpackage_name. + + .. method:: self.add_data_files(*files) + + Add files to the list of data_files to be included with the package. + The form of each element of the files sequence is very flexible + allowing many combinations of where to get the files from the package + and where they should ultimately be installed on the system. The most + basic usage is for an element of the files argument sequence to be a + simple filename. This will cause that file from the local path to be + installed to the installation path of the self.name package (package + path). The file argument can also be a relative path in which case the + entire relative path will be installed into the package directory. + Finally, the file can be an absolute path name in which case the file + will be found at the absolute path name but installed to the package + path. + + This basic behavior can be augmented by passing a 2-tuple in as the + file argument. The first element of the tuple should specify the + relative path (under the package install directory) where the + remaining sequence of files should be installed to (it has nothing to + do with the file-names in the source distribution). The second element + of the tuple is the sequence of files that should be installed. The + files in this sequence can be filenames, relative paths, or absolute + paths. For absolute paths the file will be installed in the top-level + package installation directory (regardless of the first argument). + Filenames and relative path names will be installed in the package + install directory under the path name given as the first element of + the tuple. An example may clarify:: + + self.add_data_files('foo.dat', + ('fun', ['gun.dat', 'nun/pun.dat', '/tmp/sun.dat']), + 'bar/cat.dat', + '/full/path/to/can.dat') + + will install these data files to:: + + <package install directory>/ + foo.dat + fun/ + gun.dat + nun/ + pun.dat + sun.dat + bar/ + car.dat + can.dat + + where <package install directory> is the package (or sub-package) + directory such as '/usr/lib/python2.4/site-packages/mypackage' ('C: \\Python2.4 \\Lib \\site-packages \\mypackage') or '/usr/lib/python2.4/site- + packages/mypackage/mysubpackage' ('C: \\Python2.4 \\Lib \\site-packages \\mypackage \\mysubpackage'). + + + An additional feature is that the path to a data-file can actually be + a function that takes no arguments and returns the actual path(s) to + the data-files. This is useful when the data files are generated while + building the package. + + .. method:: add_data_dir(data_path) + + Recursively add files under data_path to the list of data_files to be + installed (and distributed). The data_path can be either a relative + path-name, or an absolute path-name, or a 2-tuple where the first + argument shows where in the install directory the data directory + should be installed to. For example suppose the source directory + contains fun/foo.dat and fun/bar/car.dat:: + + self.add_data_dir('fun') + self.add_data_dir(('sun', 'fun')) + self.add_data_dir(('gun', '/full/path/to/fun')) + + Will install data-files to the locations:: + + <package install directory>/ + fun/ + foo.dat + bar/ + car.dat + sun/ + foo.dat + bar/ + car.dat + gun/ + foo.dat + car.dat + + .. method:: add_include_dirs(*paths) + + Add the given sequence of paths to the beginning of the include_dirs + list. This list will be visible to all extension modules of the + current package. + + .. method:: add_headers(*files) + + Add the given sequence of files to the beginning of the headers list. + By default, headers will be installed under <python- + include>/<self.name.replace('.','/')>/ directory. If an item of files + is a tuple, then its first argument specifies the actual installation + location relative to the <python-include> path. + + .. method:: add_extension(name, sources, **kw) + + Create and add an Extension instance to the ext_modules list. The + first argument defines the name of the extension module that will be + installed under the self.name package. The second argument is a list + of sources. This method also takes the following optional keyword + arguments that are passed on to the Extension constructor: + include_dirs, define_macros, undef_macros, library_dirs, libraries, + runtime_library_dirs, extra_objects, swig_opts, depends, language, + f2py_options, module_dirs, and extra_info. + + The self.paths(...) method is applied to all lists that may contain + paths. The extra_info is a dictionary or a list of dictionaries whose + content will be appended to the keyword arguments. The depends list + contains paths to files or directories that the sources of the + extension module depend on. If any path in the depends list is newer + than the extension module, then the module will be rebuilt. + + The list of sources may contain functions (called source generators) + which must take an extension instance and a build directory as inputs + and return a source file or list of source files or None. If None is + returned then no sources are generated. If the Extension instance has + no sources after processing all source generators, then no extension + module is built. + + .. method:: add_library(name, sources, **build_info) + + Add a library to the list of libraries. Allowed keyword arguments are + depends, macros, include_dirs, extra_compiler_args, and f2py_options. + The name is the name of the library to be built and sources is a list + of sources (or source generating functions) to add to the library. + + .. method:: add_scripts(*files) + + Add the sequence of files to the beginning of the scripts list. + Scripts will be installed under the <prefix>/bin/ directory. + + .. method:: paths(*paths) + + Applies glob.glob(...) to each path in the sequence (if needed) and + pre-pends the local_path if needed. Because this is called on all + source lists, this allows wildcard characters to be specified in lists + of sources for extension modules and libraries and scripts and allows + path-names be relative to the source directory. + + .. method:: get_config_cmd() + + Returns the numpy.distutils config command instance. + + .. method:: get_build_temp_dir() + + Return a path to a temporary directory where temporary files should be + placed. + + .. method:: have_f77c() + + True if a Fortran 77 compiler is available (because a simple Fortran + 77 code was able to be compiled successfully). + + .. method:: have_f90c() + + True if a Fortran 90 compiler is available (because a simple Fortran + 90 code was able to be compiled successfully) + + .. method:: get_version() + + Return a version string of the current package or None if the version + information could not be detected. This method scans files named + __version__.py, <packagename>_version.py, version.py, and + __svn_version__.py for string variables version, __version\__, and + <packagename>_version, until a version number is found. + + .. method:: make_svn_version_py() + + Appends a data function to the data_files list that will generate + __svn_version__.py file to the current package directory. This file + will be removed from the source directory when Python exits (so that + it can be re-generated next time the package is built). This is + intended for working with source directories that are in an SVN + repository. + + .. method:: make_config_py() + + Generate a package __config__.py file containing system information + used during the building of the package. This file is installed to the + package installation directory. + + .. method:: get_info(*names) + + Return information (from system_info.get_info) for all of the names in + the argument list in a single dictionary. + + +Other modules +------------- + +.. currentmodule:: numpy.distutils + +.. autosummary:: + :toctree: generated/ + + system_info.get_info + system_info.get_standard_file + cpuinfo.cpu + log.set_verbosity + exec_command + + +Conversion of ``.src`` files +============================ + +NumPy distutils supports automatic conversion of source files named +<somefile>.src. This facility can be used to maintain very similar +code blocks requiring only simple changes between blocks. During the +build phase of setup, if a template file named <somefile>.src is +encountered, a new file named <somefile> is constructed from the +template and placed in the build directory to be used instead. Two +forms of template conversion are supported. The first form occurs for +files named named <file>.ext.src where ext is a recognized Fortran +extension (f, f90, f95, f77, for, ftn, pyf). The second form is used +for all other cases. + +.. index:: + single: code generation + +Fortran files +------------- + +This template converter will replicate all **function** and +**subroutine** blocks in the file with names that contain '<...>' +according to the rules in '<...>'. The number of comma-separated words +in '<...>' determines the number of times the block is repeated. What +these words are indicates what that repeat rule, '<...>', should be +replaced with in each block. All of the repeat rules in a block must +contain the same number of comma-separated words indicating the number +of times that block should be repeated. If the word in the repeat rule +needs a comma, leftarrow, or rightarrow, then prepend it with a +backslash ' \'. If a word in the repeat rule matches ' \\<index>' then +it will be replaced with the <index>-th word in the same repeat +specification. There are two forms for the repeat rule: named and +short. + + +Named repeat rule +^^^^^^^^^^^^^^^^^ + +A named repeat rule is useful when the same set of repeats must be +used several times in a block. It is specified using <rule1=item1, +item2, item3,..., itemN>, where N is the number of times the block +should be repeated. On each repeat of the block, the entire +expression, '<...>' will be replaced first with item1, and then with +item2, and so forth until N repeats are accomplished. Once a named +repeat specification has been introduced, the same repeat rule may be +used **in the current block** by referring only to the name +(i.e. <rule1>. + + +Short repeat rule +^^^^^^^^^^^^^^^^^ + +A short repeat rule looks like <item1, item2, item3, ..., itemN>. The +rule specifies that the entire expression, '<...>' should be replaced +first with item1, and then with item2, and so forth until N repeats +are accomplished. + + +Pre-defined names +^^^^^^^^^^^^^^^^^ + +The following predefined named repeat rules are available: + +- <prefix=s,d,c,z> + +- <_c=s,d,c,z> + +- <_t=real, double precision, complex, double complex> + +- <ftype=real, double precision, complex, double complex> + +- <ctype=float, double, complex_float, complex_double> + +- <ftypereal=float, double precision, \\0, \\1> + +- <ctypereal=float, double, \\0, \\1> + + +Other files +----------- + +Non-Fortran files use a separate syntax for defining template blocks +that should be repeated using a variable expansion similar to the +named repeat rules of the Fortran-specific repeats. The template rules +for these files are: + +1. "/\**begin repeat "on a line by itself marks the beginning of + a segment that should be repeated. + +2. Named variable expansions are defined using #name=item1, item2, item3, + ..., itemN# and placed on successive lines. These variables are + replaced in each repeat block with corresponding word. All named + variables in the same repeat block must define the same number of + words. + +3. In specifying the repeat rule for a named variable, item*N is short- + hand for item, item, ..., item repeated N times. In addition, + parenthesis in combination with \*N can be used for grouping several + items that should be repeated. Thus, #name=(item1, item2)*4# is + equivalent to #name=item1, item2, item1, item2, item1, item2, item1, + item2# + +4. "\*/ "on a line by itself marks the end of the the variable expansion + naming. The next line is the first line that will be repeated using + the named rules. + +5. Inside the block to be repeated, the variables that should be expanded + are specified as @name@. + +6. "/\**end repeat**/ "on a line by itself marks the previous line + as the last line of the block to be repeated. diff --git a/doc/source/reference/figures/dtype-hierarchy.dia b/doc/source/reference/figures/dtype-hierarchy.dia Binary files differnew file mode 100644 index 000000000..65379b880 --- /dev/null +++ b/doc/source/reference/figures/dtype-hierarchy.dia diff --git a/doc/source/reference/figures/dtype-hierarchy.pdf b/doc/source/reference/figures/dtype-hierarchy.pdf Binary files differnew file mode 100644 index 000000000..6ce496a3e --- /dev/null +++ b/doc/source/reference/figures/dtype-hierarchy.pdf diff --git a/doc/source/reference/figures/dtype-hierarchy.png b/doc/source/reference/figures/dtype-hierarchy.png Binary files differnew file mode 100644 index 000000000..5722ac527 --- /dev/null +++ b/doc/source/reference/figures/dtype-hierarchy.png diff --git a/doc/source/reference/figures/threefundamental.fig b/doc/source/reference/figures/threefundamental.fig new file mode 100644 index 000000000..79760c410 --- /dev/null +++ b/doc/source/reference/figures/threefundamental.fig @@ -0,0 +1,57 @@ +#FIG 3.2 +Landscape +Center +Inches +Letter +100.00 +Single +-2 +1200 2 +6 1950 2850 4350 3450 +2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 + 1950 2850 4350 2850 4350 3450 1950 3450 1950 2850 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 2550 2850 2550 3450 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 3150 2850 3150 3450 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 3750 2850 3750 3450 +-6 +2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 + 5100 2850 7500 2850 7500 3450 5100 3450 5100 2850 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 5700 2850 5700 3450 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 6300 2850 6300 3450 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 + 6900 2850 6900 3450 +2 4 0 1 0 7 50 -1 -1 0.000 0 0 7 0 0 5 + 7800 3600 7800 2700 525 2700 525 3600 7800 3600 +2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 + 675 2850 1725 2850 1725 3450 675 3450 675 2850 +2 2 0 4 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 + 5700 2850 6300 2850 6300 3450 5700 3450 5700 2850 +2 2 0 4 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 + 5700 1725 6300 1725 6300 2325 5700 2325 5700 1725 +2 4 0 1 0 7 50 -1 -1 0.000 0 0 7 0 0 5 + 6450 2475 6450 1275 5550 1275 5550 2475 6450 2475 +2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 + 5700 1350 6300 1350 6300 1575 5700 1575 5700 1350 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 3 + 2 1 1.00 60.00 120.00 + 900 2850 900 1875 1575 1875 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 + 2 1 1.00 60.00 120.00 + 3375 1800 5550 1800 +2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 + 2 1 1.00 60.00 120.00 + 6000 2850 6000 2325 +2 4 0 1 0 7 50 -1 -1 0.000 0 0 7 0 0 5 + 3375 2100 3375 1575 1575 1575 1575 2100 3375 2100 +4 0 0 50 -1 18 14 0.0000 4 165 720 825 3225 header\001 +4 0 0 50 -1 2 40 0.0000 4 105 450 4500 3225 ...\001 +4 0 0 50 -1 18 14 0.0000 4 210 810 3600 3900 ndarray\001 +4 0 0 50 -1 18 14 0.0000 4 165 630 6600 2175 scalar\001 +4 0 0 50 -1 18 14 0.0000 4 165 540 6600 1950 array\001 +4 0 0 50 -1 16 12 0.0000 4 135 420 5775 1500 head\001 +4 0 0 50 -1 18 14 0.0000 4 210 975 1950 1875 data-type\001 diff --git a/doc/source/reference/figures/threefundamental.pdf b/doc/source/reference/figures/threefundamental.pdf Binary files differnew file mode 100644 index 000000000..b89e9f2af --- /dev/null +++ b/doc/source/reference/figures/threefundamental.pdf diff --git a/doc/source/reference/figures/threefundamental.png b/doc/source/reference/figures/threefundamental.png Binary files differnew file mode 100644 index 000000000..de252fc9d --- /dev/null +++ b/doc/source/reference/figures/threefundamental.png diff --git a/doc/source/reference/index.rst b/doc/source/reference/index.rst new file mode 100644 index 000000000..e954c724c --- /dev/null +++ b/doc/source/reference/index.rst @@ -0,0 +1,43 @@ +.. _reference: + +############### +NumPy Reference +############### + +:Release: |version| +:Date: |today| + +.. module:: numpy + +This reference manual details functions, modules, and objects +included in Numpy, describing what they are and what they do. +For learning how to use NumPy, see also :ref:`user`. + + +.. toctree:: + :maxdepth: 2 + + arrays + ufuncs + routines + ctypes + distutils + c-api + internals + + +Acknowledgements +================ + +Large parts of this manual originate from Travis E. Oliphant's book +`Guide to Numpy <http://www.tramy.us/>`__ (which generously entered +Public Domain in August 2008). The reference documentation for many of +the functions are written by numerous contributors and developers of +Numpy, both prior to and during the +`Numpy Documentation Marathon <http://scipy.org/Developer_Zone/DocMarathon2008>`__. + +The Documentation Marathon is still ongoing. Please help us write +better documentation for Numpy by joining it! Instructions on how to +join and what to do can be found +`on the scipy.org website <http://scipy.org/Developer_Zone/DocMarathon2008>`__ + diff --git a/doc/source/reference/internals.code-explanations.rst b/doc/source/reference/internals.code-explanations.rst new file mode 100644 index 000000000..48f487205 --- /dev/null +++ b/doc/source/reference/internals.code-explanations.rst @@ -0,0 +1,665 @@ +.. currentmodule:: numpy + +************************* +Numpy C Code Explanations +************************* + + Fanaticism consists of redoubling your efforts when you have forgotten + your aim. + --- *George Santayana* + + An authority is a person who can tell you more about something than + you really care to know. + --- *Unknown* + +This Chapter attempts to explain the logic behind some of the new +pieces of code. The purpose behind these explanations is to enable +somebody to be able to understand the ideas behind the implementation +somewhat more easily than just staring at the code. Perhaps in this +way, the algorithms can be improved on, borrowed from, and/or +optimized. + + +Memory model +============ + +.. index:: + pair: ndarray; memory model + +One fundamental aspect of the ndarray is that an array is seen as a +"chunk" of memory starting at some location. The interpretation of +this memory depends on the stride information. For each dimension in +an :math:`N` -dimensional array, an integer (stride) dictates how many +bytes must be skipped to get to the next element in that dimension. +Unless you have a single-segment array, this stride information must +be consulted when traversing through an array. It is not difficult to +write code that accepts strides, you just have to use (char \*) +pointers because strides are in units of bytes. Keep in mind also that +strides do not have to be unit-multiples of the element size. Also, +remember that if the number of dimensions of the array is 0 (sometimes +called a rank-0 array), then the strides and dimensions variables are +NULL. + +Besides the structural information contained in the strides and +dimensions members of the :ctype:`PyArrayObject`, the flags contain important +information about how the data may be accessed. In particular, the +:cdata:`NPY_ALIGNED` flag is set when the memory is on a suitable boundary +according to the data-type array. Even if you have a contiguous chunk +of memory, you cannot just assume it is safe to dereference a data- +type-specific pointer to an element. Only if the :cdata:`NPY_ALIGNED` flag is +set is this a safe operation (on some platforms it will work but on +others, like Solaris, it will cause a bus error). The :cdata:`NPY_WRITEABLE` +should also be ensured if you plan on writing to the memory area of +the array. It is also possible to obtain a pointer to an unwriteable +memory area. Sometimes, writing to the memory area when the +:cdata:`NPY_WRITEABLE` flag is not set will just be rude. Other times it can +cause program crashes ( *e.g.* a data-area that is a read-only +memory-mapped file). + + +Data-type encapsulation +======================= + +.. index:: + single: dtype + +The data-type is an important abstraction of the ndarray. Operations +will look to the data-type to provide the key functionality that is +needed to operate on the array. This functionality is provided in the +list of function pointers pointed to by the 'f' member of the +:ctype:`PyArray_Descr` structure. In this way, the number of data-types can be +extended simply by providing a :ctype:`PyArray_Descr` structure with suitable +function pointers in the 'f' member. For built-in types there are some +optimizations that by-pass this mechanism, but the point of the data- +type abstraction is to allow new data-types to be added. + +One of the built-in data-types, the void data-type allows for +arbitrary records containing 1 or more fields as elements of the +array. A field is simply another data-type object along with an offset +into the current record. In order to support arbitrarily nested +fields, several recursive implementations of data-type access are +implemented for the void type. A common idiom is to cycle through the +elements of the dictionary and perform a specific operation based on +the data-type object stored at the given offset. These offsets can be +arbitrary numbers. Therefore, the possibility of encountering mis- +aligned data must be recognized and taken into account if necessary. + + +N-D Iterators +============= + +.. index:: + single: array iterator + +A very common operation in much of NumPy code is the need to iterate +over all the elements of a general, strided, N-dimensional array. This +operation of a general-purpose N-dimensional loop is abstracted in the +notion of an iterator object. To write an N-dimensional loop, you only +have to create an iterator object from an ndarray, work with the +dataptr member of the iterator object structure and call the macro +:cfunc:`PyArray_ITER_NEXT` (it) on the iterator object to move to the next +element. The "next" element is always in C-contiguous order. The macro +works by first special casing the C-contiguous, 1-d, and 2-d cases +which work very simply. + +For the general case, the iteration works by keeping track of a list +of coordinate counters in the iterator object. At each iteration, the +last coordinate counter is increased (starting from 0). If this +counter is smaller then one less than the size of the array in that +dimension (a pre-computed and stored value), then the counter is +increased and the dataptr member is increased by the strides in that +dimension and the macro ends. If the end of a dimension is reached, +the counter for the last dimension is reset to zero and the dataptr is +moved back to the beginning of that dimension by subtracting the +strides value times one less than the number of elements in that +dimension (this is also pre-computed and stored in the backstrides +member of the iterator object). In this case, the macro does not end, +but a local dimension counter is decremented so that the next-to-last +dimension replaces the role that the last dimension played and the +previously-described tests are executed again on the next-to-last +dimension. In this way, the dataptr is adjusted appropriately for +arbitrary striding. + +The coordinates member of the :ctype:`PyArrayIterObject` structure maintains +the current N-d counter unless the underlying array is C-contiguous in +which case the coordinate counting is by-passed. The index member of +the :ctype:`PyArrayIterObject` keeps track of the current flat index of the +iterator. It is updated by the :cfunc:`PyArray_ITER_NEXT` macro. + + +Broadcasting +============ + +.. index:: + single: broadcasting + +In Numeric, broadcasting was implemented in several lines of code +buried deep in ufuncobject.c. In NumPy, the notion of broadcasting has +been abstracted so that it can be performed in multiple places. +Broadcasting is handled by the function :cfunc:`PyArray_Broadcast`. This +function requires a :ctype:`PyArrayMultiIterObject` (or something that is a +binary equivalent) to be passed in. The :ctype:`PyArrayMultiIterObject` keeps +track of the broadcasted number of dimensions and size in each +dimension along with the total size of the broadcasted result. It also +keeps track of the number of arrays being broadcast and a pointer to +an iterator for each of the arrays being broadcasted. + +The :cfunc:`PyArray_Broadcast` function takes the iterators that have already +been defined and uses them to determine the broadcast shape in each +dimension (to create the iterators at the same time that broadcasting +occurs then use the :cfunc:`PyMultiIter_New` function). Then, the iterators are +adjusted so that each iterator thinks it is iterating over an array +with the broadcasted size. This is done by adjusting the iterators +number of dimensions, and the shape in each dimension. This works +because the iterator strides are also adjusted. Broadcasting only +adjusts (or adds) length-1 dimensions. For these dimensions, the +strides variable is simply set to 0 so that the data-pointer for the +iterator over that array doesn't move as the broadcasting operation +operates over the extended dimension. + +Broadcasting was always implemented in Numeric using 0-valued strides +for the extended dimensions. It is done in exactly the same way in +NumPy. The big difference is that now the array of strides is kept +track of in a :ctype:`PyArrayIterObject`, the iterators involved in a +broadcasted result are kept track of in a :ctype:`PyArrayMultiIterObject`, +and the :cfunc:`PyArray_BroadCast` call implements the broad-casting rules. + + +Array Scalars +============= + +.. index:: + single: array scalars + +The array scalars offer a hierarchy of Python types that allow a one- +to-one correspondence between the data-type stored in an array and the +Python-type that is returned when an element is extracted from the +array. An exception to this rule was made with object arrays. Object +arrays are heterogeneous collections of arbitrary Python objects. When +you select an item from an object array, you get back the original +Python object (and not an object array scalar which does exist but is +rarely used for practical purposes). + +The array scalars also offer the same methods and attributes as arrays +with the intent that the same code can be used to support arbitrary +dimensions (including 0-dimensions). The array scalars are read-only +(immutable) with the exception of the void scalar which can also be +written to so that record-array field setting works more naturally +(a[0]['f1'] = ``value`` ). + + +Advanced ("Fancy") Indexing +============================= + +.. index:: + single: indexing + +The implementation of advanced indexing represents some of the most +difficult code to write and explain. In fact, there are two +implementations of advanced indexing. The first works only with 1-d +arrays and is implemented to handle expressions involving a.flat[obj]. +The second is general-purpose that works for arrays of "arbitrary +dimension" (up to a fixed maximum). The one-dimensional indexing +approaches were implemented in a rather straightforward fashion, and +so it is the general-purpose indexing code that will be the focus of +this section. + +There is a multi-layer approach to indexing because the indexing code +can at times return an array scalar and at other times return an +array. The functions with "_nice" appended to their name do this +special handling while the function without the _nice appendage always +return an array (perhaps a 0-dimensional array). Some special-case +optimizations (the index being an integer scalar, and the index being +a tuple with as many dimensions as the array) are handled in +array_subscript_nice function which is what Python calls when +presented with the code "a[obj]." These optimizations allow fast +single-integer indexing, and also ensure that a 0-dimensional array is +not created only to be discarded as the array scalar is returned +instead. This provides significant speed-up for code that is selecting +many scalars out of an array (such as in a loop). However, it is still +not faster than simply using a list to store standard Python scalars, +because that is optimized by the Python interpreter itself. + +After these optimizations, the array_subscript function itself is +called. This function first checks for field selection which occurs +when a string is passed as the indexing object. Then, 0-d arrays are +given special-case consideration. Finally, the code determines whether +or not advanced, or fancy, indexing needs to be performed. If fancy +indexing is not needed, then standard view-based indexing is performed +using code borrowed from Numeric which parses the indexing object and +returns the offset into the data-buffer and the dimensions necessary +to create a new view of the array. The strides are also changed by +multiplying each stride by the step-size requested along the +corresponding dimension. + + +Fancy-indexing check +-------------------- + +The fancy_indexing_check routine determines whether or not to use +standard view-based indexing or new copy-based indexing. If the +indexing object is a tuple, then view-based indexing is assumed by +default. Only if the tuple contains an array object or a sequence +object is fancy-indexing assumed. If the indexing object is an array, +then fancy indexing is automatically assumed. If the indexing object +is any other kind of sequence, then fancy-indexing is assumed by +default. This is over-ridden to simple indexing if the sequence +contains any slice, newaxis, or Ellipsis objects, and no arrays or +additional sequences are also contained in the sequence. The purpose +of this is to allow the construction of "slicing" sequences which is a +common technique for building up code that works in arbitrary numbers +of dimensions. + + +Fancy-indexing implementation +----------------------------- + +The concept of indexing was also abstracted using the idea of an +iterator. If fancy indexing is performed, then a :ctype:`PyArrayMapIterObject` +is created. This internal object is not exposed to Python. It is +created in order to handle the fancy-indexing at a high-level. Both +get and set fancy-indexing operations are implemented using this +object. Fancy indexing is abstracted into three separate operations: +(1) creating the :ctype:`PyArrayMapIterObject` from the indexing object, (2) +binding the :ctype:`PyArrayMapIterObject` to the array being indexed, and (3) +getting (or setting) the items determined by the indexing object. +There is an optimization implemented so that the :ctype:`PyArrayIterObject` +(which has it's own less complicated fancy-indexing) is used for +indexing when possible. + + +Creating the mapping object +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The first step is to convert the indexing objects into a standard form +where iterators are created for all of the index array inputs and all +Boolean arrays are converted to equivalent integer index arrays (as if +nonzero(arr) had been called). Finally, all integer arrays are +replaced with the integer 0 in the indexing object and all of the +index-array iterators are "broadcast" to the same shape. + + +Binding the mapping object +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When the mapping object is created it does not know which array it +will be used with so once the index iterators are constructed during +mapping-object creation, the next step is to associate these iterators +with a particular ndarray. This process interprets any ellipsis and +slice objects so that the index arrays are associated with the +appropriate axis (the axis indicated by the iteraxis entry +corresponding to the iterator for the integer index array). This +information is then used to check the indices to be sure they are +within range of the shape of the array being indexed. The presence of +ellipsis and/or slice objects implies a sub-space iteration that is +accomplished by extracting a sub-space view of the array (using the +index object resulting from replacing all the integer index arrays +with 0) and storing the information about where this sub-space starts +in the mapping object. This is used later during mapping-object +iteration to select the correct elements from the underlying array. + + +Getting (or Setting) +^^^^^^^^^^^^^^^^^^^^ + +After the mapping object is successfully bound to a particular array, +the mapping object contains the shape of the resulting item as well as +iterator objects that will walk through the currently-bound array and +either get or set its elements as needed. The walk is implemented +using the :cfunc:`PyArray_MapIterNext` function. This function sets the +coordinates of an iterator object into the current array to be the +next coordinate location indicated by all of the indexing-object +iterators while adjusting, if necessary, for the presence of a sub- +space. The result of this function is that the dataptr member of the +mapping object structure is pointed to the next position in the array +that needs to be copied out or set to some value. + +When advanced indexing is used to extract an array, an iterator for +the new array is constructed and advanced in phase with the mapping +object iterator. When advanced indexing is used to place values in an +array, a special "broadcasted" iterator is constructed from the object +being placed into the array so that it will only work if the values +used for setting have a shape that is "broadcastable" to the shape +implied by the indexing object. + + +Universal Functions +=================== + +.. index:: + single: ufunc + +Universal functions are callable objects that take :math:`N` inputs +and produce :math:`M` outputs by wrapping basic 1-d loops that work +element-by-element into full easy-to use functions that seamlessly +implement broadcasting, type-checking and buffered coercion, and +output-argument handling. New universal functions are normally created +in C, although there is a mechanism for creating ufuncs from Python +functions (:func:`frompyfunc`). The user must supply a 1-d loop that +implements the basic function taking the input scalar values and +placing the resulting scalars into the appropriate output slots as +explaine n implementation. + + +Setup +----- + +Every ufunc calculation involves some overhead related to setting up +the calculation. The practical significance of this overhead is that +even though the actual calculation of the ufunc is very fast, you will +be able to write array and type-specific code that will work faster +for small arrays than the ufunc. In particular, using ufuncs to +perform many calculations on 0-d arrays will be slower than other +Python-based solutions (the silently-imported scalarmath module exists +precisely to give array scalars the look-and-feel of ufunc-based +calculations with significantly reduced overhead). + +When a ufunc is called, many things must be done. The information +collected from these setup operations is stored in a loop-object. This +loop object is a C-structure (that could become a Python object but is +not initialized as such because it is only used internally). This loop +object has the layout needed to be used with PyArray_Broadcast so that +the broadcasting can be handled in the same way as it is handled in +other sections of code. + +The first thing done is to look-up in the thread-specific global +dictionary the current values for the buffer-size, the error mask, and +the associated error object. The state of the error mask controls what +happens when an error-condiction is found. It should be noted that +checking of the hardware error flags is only performed after each 1-d +loop is executed. This means that if the input and output arrays are +contiguous and of the correct type so that a single 1-d loop is +performed, then the flags may not be checked until all elements of the +array have been calcluated. Looking up these values in a thread- +specific dictionary takes time which is easily ignored for all but +very small arrays. + +After checking, the thread-specific global variables, the inputs are +evaluated to determine how the ufunc should proceed and the input and +output arrays are constructed if necessary. Any inputs which are not +arrays are converted to arrays (using context if necessary). Which of +the inputs are scalars (and therefore converted to 0-d arrays) is +noted. + +Next, an appropriate 1-d loop is selected from the 1-d loops available +to the ufunc based on the input array types. This 1-d loop is selected +by trying to match the signature of the data-types of the inputs +against the available signatures. The signatures corresponding to +built-in types are stored in the types member of the ufunc structure. +The signatures corresponding to user-defined types are stored in a +linked-list of function-information with the head element stored as a +``CObject`` in the userloops dictionary keyed by the data-type number +(the first user-defined type in the argument list is used as the key). +The signatures are searched until a signature is found to which the +input arrays can all be cast safely (ignoring any scalar arguments +which are not allowed to determine the type of the result). The +implication of this search procedure is that "lesser types" should be +placed below "larger types" when the signatures are stored. If no 1-d +loop is found, then an error is reported. Otherwise, the argument_list +is updated with the stored signature --- in case casting is necessary +and to fix the output types assumed by the 1-d loop. + +If the ufunc has 2 inputs and 1 output and the second input is an +Object array then a special-case check is performed so that +NotImplemented is returned if the second input is not an ndarray, has +the __array_priority\__ attribute, and has an __r{op}\__ special +method. In this way, Python is signaled to give the other object a +chance to complete the operation instead of using generic object-array +calculations. This allows (for example) sparse matrices to override +the multiplication operator 1-d loop. + +For input arrays that are smaller than the specified buffer size, +copies are made of all non-contiguous, mis-aligned, or out-of- +byteorder arrays to ensure that for small arrays, a single-loop is +used. Then, array iterators are created for all the input arrays and +the resulting collection of iterators is broadcast to a single shape. + +The output arguments (if any) are then processed and any missing +return arrays are constructed. If any provided output array doesn't +have the correct type (or is mis-aligned) and is smaller than the +buffer size, then a new output array is constructed with the special +UPDATEIFCOPY flag set so that when it is DECREF'd on completion of the +function, it's contents will be copied back into the output array. +Iterators for the output arguments are then processed. + +Finally, the decision is made about how to execute the looping +mechanism to ensure that all elements of the input arrays are combined +to produce the output arrays of the correct type. The options for loop +execution are one-loop (for contiguous, aligned, and correct data- +type), strided-loop (for non-contiguous but still aligned and correct +data-type), and a buffered loop (for mis-aligned or incorrect data- +type situations). Depending on which execution method is called for, +the loop is then setup and computed. + + +Function call +------------- + +This section describes how the basic universal function computation +loop is setup and executed for each of the three different kinds of +execution possibilities. If :cdata:`NPY_ALLOW_THREADS` is defined during +compilation, then the Python Global Interpreter Lock (GIL) is released +prior to calling all of these loops (as long as they don't involve +object arrays). It is re-acquired if necessary to handle error +conditions. The hardware error flags are checked only after the 1-d +loop is calcluated. + + +One Loop +^^^^^^^^ + +This is the simplest case of all. The ufunc is executed by calling the +underlying 1-d loop exactly once. This is possible only when we have +aligned data of the correct type (including byte-order) for both input +and output and all arrays have uniform strides (either contiguous, +0-d, or 1-d). In this case, the 1-d computational loop is called once +to compute the calculation for the entire array. Note that the +hardware error flags are only checked after the entire calculation is +complete. + + +Strided Loop +^^^^^^^^^^^^ + +When the input and output arrays are aligned and of the correct type, +but the striding is not uniform (non-contiguous and 2-d or larger), +then a second looping structure is employed for the calculation. This +approach converts all of the iterators for the input and output +arguments to iterate over all but the largest dimension. The inner +loop is then handled by the underlying 1-d computational loop. The +outer loop is a standard iterator loop on the converted iterators. The +hardware error flags are checked after each 1-d loop is completed. + + +Buffered Loop +^^^^^^^^^^^^^ + +This is the code that handles the situation whenever the input and/or +output arrays are either misaligned or of the wrong data-type +(including being byte-swapped) from what the underlying 1-d loop +expects. The arrays are also assumed to be non-contiguous. The code +works very much like the strided loop except for the inner 1-d loop is +modified so that pre-processing is performed on the inputs and post- +processing is performed on the outputs in bufsize chunks (where +bufsize is a user-settable parameter). The underlying 1-d +computational loop is called on data that is copied over (if it needs +to be). The setup code and the loop code is considerably more +complicated in this case because it has to handle: + +- memory allocation of the temporary buffers + +- deciding whether or not to use buffers on the input and output data + (mis-aligned and/or wrong data-type) + +- copying and possibly casting data for any inputs or outputs for which + buffers are necessary. + +- special-casing Object arrays so that reference counts are properly + handled when copies and/or casts are necessary. + +- breaking up the inner 1-d loop into bufsize chunks (with a possible + remainder). + +Again, the hardware error flags are checked at the end of each 1-d +loop. + + +Final output manipulation +------------------------- + +Ufuncs allow other array-like classes to be passed seamlessly through +the interface in that inputs of a particular class will induce the +outputs to be of that same class. The mechanism by which this works is +the following. If any of the inputs are not ndarrays and define the +:obj:`__array_wrap__` method, then the class with the largest +:obj:`__array_priority__` attribute determines the type of all the +outputs (with the exception of any output arrays passed in). The +:obj:`__array_wrap__` method of the input array will be called with the +ndarray being returned from the ufunc as it's input. There are two +calling styles of the :obj:`__array_wrap__` function supported. The first +takes the ndarray as the first argument and a tuple of "context" as +the second argument. The context is (ufunc, arguments, output argument +number). This is the first call tried. If a TypeError occurs, then the +function is called with just the ndarray as the first argument. + + +Methods +------- + +Their are three methods of ufuncs that require calculation similar to +the general-purpose ufuncs. These are reduce, accumulate, and +reduceat. Each of these methods requires a setup command followed by a +loop. There are four loop styles possible for the methods +corresponding to no-elements, one-element, strided-loop, and buffered- +loop. These are the same basic loop styles as implemented for the +general purpose function call except for the no-element and one- +element cases which are special-cases occurring when the input array +objects have 0 and 1 elements respectively. + + +Setup +^^^^^ + +The setup function for all three methods is ``construct_reduce``. +This function creates a reducing loop object and fills it with +parameters needed to complete the loop. All of the methods only work +on ufuncs that take 2-inputs and return 1 output. Therefore, the +underlying 1-d loop is selected assuming a signature of [ ``otype``, +``otype``, ``otype`` ] where ``otype`` is the requested reduction +data-type. The buffer size and error handling is then retrieved from +(per-thread) global storage. For small arrays that are mis-aligned or +have incorrect data-type, a copy is made so that the un-buffered +section of code is used. Then, the looping strategy is selected. If +there is 1 element or 0 elements in the array, then a simple looping +method is selected. If the array is not mis-aligned and has the +correct data-type, then strided looping is selected. Otherwise, +buffered looping must be performed. Looping parameters are then +established, and the return array is constructed. The output array is +of a different shape depending on whether the method is reduce, +accumulate, or reduceat. If an output array is already provided, then +it's shape is checked. If the output array is not C-contiguous, +aligned, and of the correct data type, then a temporary copy is made +with the UPDATEIFCOPY flag set. In this way, the methods will be able +to work with a well-behaved output array but the result will be copied +back into the true output array when the method computation is +complete. Finally, iterators are set up to loop over the correct axis +(depending on the value of axis provided to the method) and the setup +routine returns to the actual computation routine. + + +Reduce +^^^^^^ + +.. index:: + triple: ufunc; methods; reduce + +All of the ufunc methods use the same underlying 1-d computational +loops with input and output arguments adjusted so that the appropriate +reduction takes place. For example, the key to the functioning of +reduce is that the 1-d loop is called with the output and the second +input pointing to the same position in memory and both having a step- +size of 0. The first input is pointing to the input array with a step- +size given by the appropriate stride for the selected axis. In this +way, the operation performed is + +.. math:: + :nowrap: + + \begin{align*} + o & = & i[0] \\ + o & = & i[k]\textrm{<op>}o\quad k=1\ldots N + \end{align*} + +where :math:`N+1` is the number of elements in the input, :math:`i`, +:math:`o` is the output, and :math:`i[k]` is the +:math:`k^{\textrm{th}}` element of :math:`i` along the selected axis. +This basic operations is repeated for arrays with greater than 1 +dimension so that the reduction takes place for every 1-d sub-array +along the selected axis. An iterator with the selected dimension +removed handles this looping. + +For buffered loops, care must be taken to copy and cast data before +the loop function is called because the underlying loop expects +aligned data of the correct data-type (including byte-order). The +buffered loop must handle this copying and casting prior to calling +the loop function on chunks no greater than the user-specified +bufsize. + + +Accumulate +^^^^^^^^^^ + +.. index:: + triple: ufunc; methods; accumulate + +The accumulate function is very similar to the reduce function in that +the output and the second input both point to the output. The +difference is that the second input points to memory one stride behind +the current output pointer. Thus, the operation performed is + +.. math:: + :nowrap: + + \begin{align*} + o[0] & = & i[0] \\ + o[k] & = & i[k]\textrm{<op>}o[k-1]\quad k=1\ldots N. + \end{align*} + +The output has the same shape as the input and each 1-d loop operates +over :math:`N` elements when the shape in the selected axis is :math:`N+1`. Again, buffered loops take care to copy and cast the data before +calling the underlying 1-d computational loop. + + +Reduceat +^^^^^^^^ + +.. index:: + triple: ufunc; methods; reduceat + single: ufunc + +The reduceat function is a generalization of both the reduce and +accumulate functions. It implements a reduce over ranges of the input +array specified by indices. The extra indices argument is checked to +be sure that every input is not too large for the input array along +the selected dimension before the loop calculations take place. The +loop implementation is handled using code that is very similar to the +reduce code repeated as many times as there are elements in the +indices input. In particular: the first input pointer passed to the +underlying 1-d computational loop points to the input array at the +correct location indicated by the index array. In addition, the output +pointer and the second input pointer passed to the underlying 1-d loop +point to the same position in memory. The size of the 1-d +computational loop is fixed to be the difference between the current +index and the next index (when the current index is the last index, +then the next index is assumed to be the length of the array along the +selected dimension). In this way, the 1-d loop will implement a reduce +over the specified indices. + +Mis-aligned or a loop data-type that does not match the input and/or +output data-type is handled using buffered code where-in data is +copied to a temporary buffer and cast to the correct data-type if +necessary prior to calling the underlying 1-d function. The temporary +buffers are created in (element) sizes no bigger than the user +settable buffer-size value. Thus, the loop must be flexible enough to +call the underlying 1-d computational loop enough times to complete +the total calculation in chunks no bigger than the buffer-size. diff --git a/doc/source/reference/internals.rst b/doc/source/reference/internals.rst new file mode 100644 index 000000000..c9716813d --- /dev/null +++ b/doc/source/reference/internals.rst @@ -0,0 +1,9 @@ +*************** +Numpy internals +*************** + +.. toctree:: + + internals.code-explanations + +.. automodule:: numpy.doc.internals diff --git a/doc/source/reference/routines.array-creation.rst b/doc/source/reference/routines.array-creation.rst new file mode 100644 index 000000000..b5385fb86 --- /dev/null +++ b/doc/source/reference/routines.array-creation.rst @@ -0,0 +1,98 @@ +.. _routines.array-creation: + +Array creation routines +======================= + +.. seealso:: :ref:`Array creation <arrays.creation>` + +.. currentmodule:: numpy + +Ones and zeros +-------------- +.. autosummary:: + :toctree: generated/ + + empty + empty_like + eye + identity + ones + ones_like + zeros + zeros_like + +From existing data +------------------ +.. autosummary:: + :toctree: generated/ + + array + asarray + asanyarray + ascontiguousarray + asmatrix + copy + frombuffer + fromfile + fromfunction + fromiter + loadtxt + +.. _routines.array-creation.rec: + +Creating record arrays (:mod:`numpy.rec`) +----------------------------------------- + +.. note:: :mod:`numpy.rec` is the preferred alias for :mod:`numpy.core.records`. + +.. autosummary:: + :toctree: generated/ + + core.records.array + core.records.fromarrays + core.records.fromrecords + core.records.fromstring + core.records.fromfile + +.. _routines.array-creation.char: + +Creating character arrays (:mod:`numpy.char`) +--------------------------------------------- + +.. note:: :mod:`numpy.char` is the preferred alias for :mod:`numpy.core.defchararray`. + +.. autosummary:: + :toctree: generated/ + + core.defchararray.array + +Numerical ranges +---------------- +.. autosummary:: + :toctree: generated/ + + arange + linspace + logspace + meshgrid + mgrid + +Building matrices +----------------- +.. autosummary:: + :toctree: generated/ + + diag + diagflat + tri + tril + triu + vander + +The Matrix class +---------------- +.. autosummary:: + :toctree: generated/ + + mat + bmat diff --git a/doc/source/reference/routines.array-manipulation.rst b/doc/source/reference/routines.array-manipulation.rst new file mode 100644 index 000000000..5dedf01d7 --- /dev/null +++ b/doc/source/reference/routines.array-manipulation.rst @@ -0,0 +1,108 @@ +Array manipulation routines +*************************** + +.. currentmodule:: numpy + +.. toctree:: + +Changing array shape +==================== +.. autosummary:: + :toctree: generated/ + + + reshape + ravel + ndarray.flat + ndarray.flatten + +Transpose-like operations +========================= +.. autosummary:: + :toctree: generated/ + + + rollaxis + swapaxes + ndarray.T + transpose + +Changing number of dimensions +============================= +.. autosummary:: + :toctree: generated/ + + + atleast_1d + atleast_2d + atleast_3d + broadcast + broadcast_arrays + expand_dims + squeeze + +Changing kind of array +====================== +.. autosummary:: + :toctree: generated/ + + asarray + asanyarray + asmatrix + asfarray + asfortranarray + asscalar + require + +Joining arrays +============== +.. autosummary:: + :toctree: generated/ + + append + column_stack + concatenate + dstack + hstack + vstack + +Splitting arrays +================ +.. autosummary:: + :toctree: generated/ + + array_split + dsplit + hsplit + split + vsplit + +Tiling arrays +============= +.. autosummary:: + :toctree: generated/ + + tile + repeat + +Adding and removing elements +============================ +.. autosummary:: + :toctree: generated/ + + delete + insert + resize + trim_zeros + unique + +Rearranging elements +==================== +.. autosummary:: + :toctree: generated/ + + fliplr + flipud + reshape + roll + rot90 diff --git a/doc/source/reference/routines.bitwise.rst b/doc/source/reference/routines.bitwise.rst new file mode 100644 index 000000000..58661abc7 --- /dev/null +++ b/doc/source/reference/routines.bitwise.rst @@ -0,0 +1,31 @@ +Binary operations +***************** + +.. currentmodule:: numpy + +Elementwise bit operations +-------------------------- +.. autosummary:: + :toctree: generated/ + + bitwise_and + bitwise_or + bitwise_xor + invert + left_shift + right_shift + +Bit packing +----------- +.. autosummary:: + :toctree: generated/ + + packbits + unpackbits + +Output formatting +----------------- +.. autosummary:: + :toctree: generated/ + + binary_repr diff --git a/doc/source/reference/routines.ctypeslib.rst b/doc/source/reference/routines.ctypeslib.rst new file mode 100644 index 000000000..b04713b61 --- /dev/null +++ b/doc/source/reference/routines.ctypeslib.rst @@ -0,0 +1,11 @@ +*********************************************************** +C-Types Foreign Function Interface (:mod:`numpy.ctypeslib`) +*********************************************************** + +.. currentmodule:: numpy.ctypeslib + +.. autofunction:: as_array +.. autofunction:: as_ctypes +.. autofunction:: ctypes_load_library +.. autofunction:: load_library +.. autofunction:: ndpointer diff --git a/doc/source/reference/routines.dtype.rst b/doc/source/reference/routines.dtype.rst new file mode 100644 index 000000000..a311f3da5 --- /dev/null +++ b/doc/source/reference/routines.dtype.rst @@ -0,0 +1,52 @@ +.. _routines.dtype: + +Data type routines +================== + +.. currentmodule:: numpy + +.. autosummary:: + :toctree: generated/ + + can_cast + common_type + obj2sctype + +Creating data types +------------------- + +.. autosummary:: + :toctree: generated/ + + + dtype + format_parser + +Data type information +--------------------- +.. autosummary:: + :toctree: generated/ + + finfo + iinfo + MachAr + +Data type testing +----------------- +.. autosummary:: + :toctree: generated/ + + issctype + issubdtype + issubsctype + issubclass_ + find_common_type + +Miscellaneous +------------- +.. autosummary:: + :toctree: generated/ + + typename + sctype2char + mintypecode diff --git a/doc/source/reference/routines.dual.rst b/doc/source/reference/routines.dual.rst new file mode 100644 index 000000000..456fc5c02 --- /dev/null +++ b/doc/source/reference/routines.dual.rst @@ -0,0 +1,48 @@ +Optionally Scipy-accelerated routines (:mod:`numpy.dual`) +********************************************************* + +.. automodule:: numpy.dual + +Linear algebra +-------------- + +.. currentmodule:: numpy.linalg + +.. autosummary:: + + cholesky + det + eig + eigh + eigvals + eigvalsh + inv + lstsq + norm + pinv + solve + svd + +FFT +--- + +.. currentmodule:: numpy.fft + +.. autosummary:: + + fft + fft2 + fftn + ifft + ifft2 + ifftn + +Other +----- + +.. currentmodule:: numpy + +.. autosummary:: + + i0 + diff --git a/doc/source/reference/routines.emath.rst b/doc/source/reference/routines.emath.rst new file mode 100644 index 000000000..93c1c6801 --- /dev/null +++ b/doc/source/reference/routines.emath.rst @@ -0,0 +1,10 @@ +Mathematical functions with automatic domain (:mod:`numpy.emath`) +*********************************************************************** + +.. currentmodule:: numpy + +.. note:: :mod:`numpy.emath` is a preferred alias for :mod:`numpy.lib.scimath`, + available after :mod:`numpy` is imported. + +.. automodule:: numpy.lib.scimath + :members: diff --git a/doc/source/reference/routines.err.rst b/doc/source/reference/routines.err.rst new file mode 100644 index 000000000..b3a7164b9 --- /dev/null +++ b/doc/source/reference/routines.err.rst @@ -0,0 +1,25 @@ +Floating point error handling +***************************** + +.. currentmodule:: numpy + +Setting and getting error handling +---------------------------------- + +.. autosummary:: + :toctree: generated/ + + seterr + geterr + seterrcall + geterrcall + errstate + +Internal functions +------------------ + +.. autosummary:: + :toctree: generated/ + + seterrobj + geterrobj diff --git a/doc/source/reference/routines.fft.rst b/doc/source/reference/routines.fft.rst new file mode 100644 index 000000000..e830fcf0d --- /dev/null +++ b/doc/source/reference/routines.fft.rst @@ -0,0 +1,59 @@ +.. _routines.fft: + +Fourier transforms (:mod:`numpy.fft`) +************************************* + +.. currentmodule:: numpy.fft + +1-dimensional +------------- +.. autosummary:: + :toctree: generated/ + + fft + ifft + +2-dimensional +------------- +.. autosummary:: + :toctree: generated/ + + fft2 + ifft2 + +N-dimensional +------------- +.. autosummary:: + :toctree: generated/ + + fftn + ifftn + +Hermite symmetric +----------------- +.. autosummary:: + :toctree: generated/ + + hfft + ihfft + +Real-valued +----------- +.. autosummary:: + :toctree: generated/ + + rfft + irfft + rfft2 + irfft2 + rfftn + irfftn + +Helper routines +--------------- +.. autosummary:: + :toctree: generated/ + + fftfreq + fftshift + ifftshift diff --git a/doc/source/reference/routines.financial.rst b/doc/source/reference/routines.financial.rst new file mode 100644 index 000000000..5f426d7ab --- /dev/null +++ b/doc/source/reference/routines.financial.rst @@ -0,0 +1,21 @@ +Financial functions +******************* + +.. currentmodule:: numpy + +Simple financial functions +-------------------------- + +.. autosummary:: + :toctree: generated/ + + fv + pv + npv + pmt + ppmt + ipmt + irr + mirr + nper + rate diff --git a/doc/source/reference/routines.functional.rst b/doc/source/reference/routines.functional.rst new file mode 100644 index 000000000..e4aababdd --- /dev/null +++ b/doc/source/reference/routines.functional.rst @@ -0,0 +1,13 @@ +Functional programming +********************** + +.. currentmodule:: numpy + +.. autosummary:: + :toctree: generated/ + + apply_along_axis + apply_over_axes + vectorize + frompyfunc + piecewise diff --git a/doc/source/reference/routines.help.rst b/doc/source/reference/routines.help.rst new file mode 100644 index 000000000..a41563cce --- /dev/null +++ b/doc/source/reference/routines.help.rst @@ -0,0 +1,24 @@ +.. _routines.help: + +Numpy-specific help functions +============================= + +.. currentmodule:: numpy + +Finding help +------------ + +.. autosummary:: + :toctree: generated/ + + lookfor + + +Reading help +------------ + +.. autosummary:: + :toctree: generated/ + + info + source diff --git a/doc/source/reference/routines.indexing.rst b/doc/source/reference/routines.indexing.rst new file mode 100644 index 000000000..f618fa0a4 --- /dev/null +++ b/doc/source/reference/routines.indexing.rst @@ -0,0 +1,53 @@ +.. _routines.indexing: + +Indexing routines +================= + +.. seealso:: :ref:`Indexing <arrays.indexing>` + +.. currentmodule:: numpy + +Generating index arrays +----------------------- +.. autosummary:: + :toctree: generated/ + + c_ + r_ + s_ + nonzero + where + indices + ix_ + ogrid + unravel_index + +Indexing-like operations +------------------------ +.. autosummary:: + :toctree: generated/ + + take + choose + compress + diag + diagonal + select + +Inserting data into arrays +-------------------------- +.. autosummary:: + :toctree: generated/ + + place + put + putmask + +Iterating over arrays +--------------------- +.. autosummary:: + :toctree: generated/ + + ndenumerate + ndindex + flatiter diff --git a/doc/source/reference/routines.io.rst b/doc/source/reference/routines.io.rst new file mode 100644 index 000000000..5da0bebf2 --- /dev/null +++ b/doc/source/reference/routines.io.rst @@ -0,0 +1,64 @@ +Input and output +**************** + +.. currentmodule:: numpy + +NPZ files +--------- +.. autosummary:: + :toctree: generated/ + + load + save + savez + +Text files +---------- +.. autosummary:: + :toctree: generated/ + + loadtxt + savetxt + fromregex + fromstring + ndarray.tofile + ndarray.tolist + +String formatting +----------------- +.. autosummary:: + :toctree: generated/ + + array_repr + array_str + +Memory mapping files +-------------------- +.. autosummary:: + :toctree: generated/ + + memmap + +Text formatting options +----------------------- +.. autosummary:: + :toctree: generated/ + + set_printoptions + get_printoptions + set_string_function + +Base-n representations +---------------------- +.. autosummary:: + :toctree: generated/ + + binary_repr + base_repr + +Data sources +------------ +.. autosummary:: + :toctree: generated/ + + DataSource diff --git a/doc/source/reference/routines.linalg.rst b/doc/source/reference/routines.linalg.rst new file mode 100644 index 000000000..595b02744 --- /dev/null +++ b/doc/source/reference/routines.linalg.rst @@ -0,0 +1,67 @@ +.. _routines.linalg: + +Linear algebra (:mod:`numpy.linalg`) +************************************ + +.. currentmodule:: numpy + +Matrix and vector products +-------------------------- +.. autosummary:: + :toctree: generated/ + + dot + vdot + inner + outer + tensordot + linalg.matrix_power + kron + +Decompositions +-------------- +.. autosummary:: + :toctree: generated/ + + linalg.cholesky + linalg.qr + linalg.svd + +Matrix eigenvalues +------------------ +.. autosummary:: + :toctree: generated/ + + linalg.eig + linalg.eigh + linalg.eigvals + linalg.eigvalsh + +Norms and other numbers +----------------------- +.. autosummary:: + :toctree: generated/ + + linalg.norm + linalg.cond + linalg.det + trace + +Solving equations and inverting matrices +---------------------------------------- +.. autosummary:: + :toctree: generated/ + + linalg.solve + linalg.tensorsolve + linalg.lstsq + linalg.inv + linalg.pinv + linalg.tensorinv + +Exceptions +---------- +.. autosummary:: + :toctree: generated/ + + linalg.LinAlgError diff --git a/doc/source/reference/routines.logic.rst b/doc/source/reference/routines.logic.rst new file mode 100644 index 000000000..56e36f49a --- /dev/null +++ b/doc/source/reference/routines.logic.rst @@ -0,0 +1,64 @@ +Logic functions +*************** + +.. currentmodule:: numpy + +Truth value testing +------------------- +.. autosummary:: + :toctree: generated/ + + all + any + +Array contents +-------------- +.. autosummary:: + :toctree: generated/ + + isfinite + isinf + isnan + isneginf + isposinf + +Array type testing +------------------ +.. autosummary:: + :toctree: generated/ + + iscomplex + iscomplexobj + isfortran + isreal + isrealobj + isscalar + +Logical operations +------------------ +.. autosummary:: + :toctree: generated/ + + logical_and + logical_or + logical_not + logical_xor + +Comparison +---------- +.. autosummary:: + :toctree: generated/ + + allclose + array_equal + array_equiv + +.. autosummary:: + :toctree: generated/ + + greater + greater_equal + less + less_equal + equal + not_equal diff --git a/doc/source/reference/routines.ma.rst b/doc/source/reference/routines.ma.rst new file mode 100644 index 000000000..425abf112 --- /dev/null +++ b/doc/source/reference/routines.ma.rst @@ -0,0 +1,52 @@ +.. _routines.ma: + +Masked array operations +*********************** + +.. currentmodule:: numpy + +Creation +-------- + +.. autosummary:: + :toctree: generated/ + + ma.masked_array + +Converting to ndarray +--------------------- + +.. autosummary:: + :toctree: generated/ + + ma.filled + ma.common_fill_value + ma.default_fill_value + ma.masked_array.get_fill_value + ma.maximum_fill_value + ma.minimum_fill_value + +Inspecting the array +-------------------- + +.. autosummary:: + :toctree: generated/ + + ma.getmask + ma.getmaskarray + ma.getdata + ma.count_masked + +Modifying the mask +------------------ + +.. autosummary:: + :toctree: generated/ + + ma.make_mask + ma.mask_cols + ma.mask_or + ma.mask_rowcols + ma.mask_rows + ma.harden_mask + ma.ids diff --git a/doc/source/reference/routines.math.rst b/doc/source/reference/routines.math.rst new file mode 100644 index 000000000..2ae1762c6 --- /dev/null +++ b/doc/source/reference/routines.math.rst @@ -0,0 +1,143 @@ +Mathematical functions +********************** + +.. currentmodule:: numpy + +Trigonometric functions +----------------------- +.. autosummary:: + :toctree: generated/ + + sin + cos + tan + arcsin + arccos + arctan + hypot + arctan2 + degrees + radians + unwrap + +Hyperbolic functions +-------------------- +.. autosummary:: + :toctree: generated/ + + sinh + cosh + tanh + arcsinh + arccosh + arctanh + +Rounding +-------- +.. autosummary:: + :toctree: generated/ + + around + round_ + rint + fix + floor + ceil + +Sums, products, differences +--------------------------- +.. autosummary:: + :toctree: generated/ + + prod + sum + nansum + cumprod + cumsum + diff + ediff1d + gradient + cross + trapz + +Exponents and logarithms +------------------------ +.. autosummary:: + :toctree: generated/ + + exp + expm1 + log + log10 + log2 + log1p + +Other special functions +----------------------- +.. autosummary:: + :toctree: generated/ + + i0 + sinc + +Floating point routines +----------------------- +.. autosummary:: + :toctree: generated/ + + signbit + frexp + ldexp + +Arithmetic operations +--------------------- +.. autosummary:: + :toctree: generated/ + + add + reciprocal + negative + multiply + divide + power + subtract + true_divide + floor_divide + + fmod + mod + modf + remainder + +Handling complex numbers +------------------------ +.. autosummary:: + :toctree: generated/ + + angle + real + imag + conj + + +Miscellaneous +------------- +.. autosummary:: + :toctree: generated/ + + convolve + clip + + sqrt + square + + absolute + fabs + sign + maximum + minimum + + nan_to_num + real_if_close + + interp diff --git a/doc/source/reference/routines.matlib.rst b/doc/source/reference/routines.matlib.rst new file mode 100644 index 000000000..25e381f26 --- /dev/null +++ b/doc/source/reference/routines.matlib.rst @@ -0,0 +1,11 @@ +Matrix library (:mod:`numpy.matlib`) +************************************ + +.. currentmodule:: numpy + +This module contains all functions in the :mod:`numpy` namespace, with +the following replacement functions that return :class:`matrices +<matrix>` instead of :class:`ndarrays <ndarray>`. + +.. automodule:: numpy.matlib + :members: diff --git a/doc/source/reference/routines.numarray.rst b/doc/source/reference/routines.numarray.rst new file mode 100644 index 000000000..8d47cc989 --- /dev/null +++ b/doc/source/reference/routines.numarray.rst @@ -0,0 +1,6 @@ +********************************************** +Numarray compatibility (:mod:`numpy.numarray`) +********************************************** + +.. automodule:: numpy.numarray + :members: diff --git a/doc/source/reference/routines.oldnumeric.rst b/doc/source/reference/routines.oldnumeric.rst new file mode 100644 index 000000000..9c02b4070 --- /dev/null +++ b/doc/source/reference/routines.oldnumeric.rst @@ -0,0 +1,8 @@ +*************************************************** +Old Numeric compatibility (:mod:`numpy.oldnumeric`) +*************************************************** + +.. currentmodule:: numpy + +.. automodule:: numpy.oldnumeric + :members: diff --git a/doc/source/reference/routines.other.rst b/doc/source/reference/routines.other.rst new file mode 100644 index 000000000..354f45733 --- /dev/null +++ b/doc/source/reference/routines.other.rst @@ -0,0 +1,24 @@ +Miscellaneous routines +********************** + +.. toctree:: + +.. currentmodule:: numpy + +Buffer objects +-------------- +.. autosummary:: + :toctree: generated/ + + getbuffer + newbuffer + +Performance tuning +------------------ +.. autosummary:: + :toctree: generated/ + + alterdot + restoredot + setbufsize + getbufsize diff --git a/doc/source/reference/routines.poly.rst b/doc/source/reference/routines.poly.rst new file mode 100644 index 000000000..f30b2c884 --- /dev/null +++ b/doc/source/reference/routines.poly.rst @@ -0,0 +1,46 @@ +Polynomials +*********** + +.. currentmodule:: numpy + +Basics +------ +.. autosummary:: + :toctree: generated/ + + poly1d + polyval + poly + roots + +Fitting +------- +.. autosummary:: + :toctree: generated/ + + polyfit + +Calculus +-------- +.. autosummary:: + :toctree: generated/ + + polyder + polyint + +Arithmetic +---------- +.. autosummary:: + :toctree: generated/ + + polyadd + polydiv + polymul + polysub + +Warnings +-------- +.. autosummary:: + :toctree: generated/ + + RankWarning diff --git a/doc/source/reference/routines.random.rst b/doc/source/reference/routines.random.rst new file mode 100644 index 000000000..508c2c96e --- /dev/null +++ b/doc/source/reference/routines.random.rst @@ -0,0 +1,77 @@ +.. _routines.random: + +Random sampling (:mod:`numpy.random`) +************************************* + +.. currentmodule:: numpy.random + +Simple random data +================== +.. autosummary:: + :toctree: generated/ + + rand + randn + randint + random_integers + random_sample + bytes + +Permutations +============ +.. autosummary:: + :toctree: generated/ + + shuffle + permutation + +Distributions +============= +.. autosummary:: + :toctree: generated/ + + beta + binomial + chisquare + mtrand.dirichlet + exponential + f + gamma + geometric + gumbel + hypergeometric + laplace + logistic + lognormal + logseries + multinomial + multivariate_normal + negative_binomial + noncentral_chisquare + noncentral_f + normal + pareto + poisson + power + rayleigh + standard_cauchy + standard_exponential + standard_gamma + standard_normal + standard_t + triangular + uniform + vonmises + wald + weibull + zipf + +Random generator +================ +.. autosummary:: + :toctree: generated/ + + mtrand.RandomState + seed + get_state + set_state diff --git a/doc/source/reference/routines.rst b/doc/source/reference/routines.rst new file mode 100644 index 000000000..e682f5a57 --- /dev/null +++ b/doc/source/reference/routines.rst @@ -0,0 +1,35 @@ +******** +Routines +******** + +.. toctree:: + :maxdepth: 2 + + routines.array-creation + routines.array-manipulation + routines.indexing + routines.dtype + routines.io + routines.fft + routines.linalg + routines.random + routines.sort + routines.logic + routines.bitwise + routines.statistics + routines.math + routines.functional + routines.poly + routines.financial + routines.set + routines.window + routines.err + routines.ma + routines.help + routines.other + routines.emath + routines.matlib + routines.dual + routines.numarray + routines.oldnumeric + routines.ctypeslib diff --git a/doc/source/reference/routines.set.rst b/doc/source/reference/routines.set.rst new file mode 100644 index 000000000..4c298e80f --- /dev/null +++ b/doc/source/reference/routines.set.rst @@ -0,0 +1,23 @@ +Set routines +============ + +.. currentmodule:: numpy + +Making proper sets +------------------ +.. autosummary:: + :toctree: generated/ + + unique1d + +Boolean operations +------------------ +.. autosummary:: + :toctree: generated/ + + intersect1d + intersect1d_nu + setdiff1d + setmember1d + setxor1d + union1d diff --git a/doc/source/reference/routines.sort.rst b/doc/source/reference/routines.sort.rst new file mode 100644 index 000000000..8dc769ea9 --- /dev/null +++ b/doc/source/reference/routines.sort.rst @@ -0,0 +1,32 @@ +Sorting and searching +===================== + +.. currentmodule:: numpy + +Sorting +------- +.. autosummary:: + :toctree: generated/ + + sort + lexsort + argsort + ndarray.sort + msort + sort_complex + +Searching +--------- +.. autosummary:: + :toctree: generated/ + + argmax + nanargmax + argmin + nanargmin + argwhere + nonzero + flatnonzero + where + searchsorted + extract diff --git a/doc/source/reference/routines.statistics.rst b/doc/source/reference/routines.statistics.rst new file mode 100644 index 000000000..89009e210 --- /dev/null +++ b/doc/source/reference/routines.statistics.rst @@ -0,0 +1,51 @@ +Statistics +========== + +.. currentmodule:: numpy + + +Extremal values +--------------- + +.. autosummary:: + :toctree: generated/ + + amin + amax + nanmax + nanmin + ptp + +Averages and variances +---------------------- + +.. autosummary:: + :toctree: generated/ + + average + mean + median + std + var + +Correlating +----------- + +.. autosummary:: + :toctree: generated/ + + corrcoef + correlate + cov + +Histograms +---------- + +.. autosummary:: + :toctree: generated/ + + histogram + histogram2d + histogramdd + bincount + digitize diff --git a/doc/source/reference/routines.window.rst b/doc/source/reference/routines.window.rst new file mode 100644 index 000000000..7f3414815 --- /dev/null +++ b/doc/source/reference/routines.window.rst @@ -0,0 +1,16 @@ +Window functions +================ + +.. currentmodule:: numpy + +Various windows +--------------- + +.. autosummary:: + :toctree: generated/ + + bartlett + blackman + hamming + hanning + kaiser diff --git a/doc/source/reference/ufuncs.rst b/doc/source/reference/ufuncs.rst new file mode 100644 index 000000000..12f952801 --- /dev/null +++ b/doc/source/reference/ufuncs.rst @@ -0,0 +1,555 @@ +.. sectionauthor:: adapted from "Guide to Numpy" by Travis E. Oliphant + +.. _ufuncs: + +************************************ +Universal functions (:class:`ufunc`) +************************************ + +.. note: XXX: section might need to be made more reference-guideish... + +.. currentmodule:: numpy + +.. index: ufunc, universal function, arithmetic, operation + +A universal function (or :term:`ufunc` for short) is a function that +operates on :class:`ndarrays <ndarray>` in an element-by-element fashion, +supporting :ref:`array broadcasting <ufuncs.broadcasting>`, :ref:`type +casting <ufuncs.casting>`, and several other standard features. That +is, a ufunc is a ":term:`vectorized`" wrapper for a function that +takes a fixed number of scalar inputs and produces a fixed number of +scalar outputs. + +In Numpy, universal functions are instances of the +:class:`numpy.ufunc` class. Many of the built-in functions are +implemented in compiled C code, but :class:`ufunc` instances can also +be produced using the :func:`frompyfunc` factory function. + + +.. _ufuncs.broadcasting: + +Broadcasting +============ + +.. index:: broadcasting + +Each universal function takes array inputs and produces array outputs +by performing the core function element-wise on the inputs. Standard +broadcasting rules are applied so that inputs not sharing exactly the +same shapes can still be usefully operated on. Broadcasting can be +understood by four rules: + +1. All input arrays with :attr:`ndim <ndarray.ndim>` smaller than the + input array of largest :attr:`ndim <ndarray.ndim>` have 1's + prepended to their shapes. + +2. The size in each dimension of the output shape is the maximum of all + the input shapes in that dimension. + +3. An input can be used in the calculation if it's shape in a particular + dimension either matches the output shape or has value exactly 1. + +4. If an input has a dimension size of 1 in its shape, the first data + entry in that dimension will be used for all calculations along + that dimension. In other words, the stepping machinery of the + :term:`ufunc` will simply not step along that dimension when + otherwise needed (the :term:`stride` will be 0 for that dimension). + +Broadcasting is used throughout NumPy to decide how to handle non +equally-shaped arrays; for example all arithmetic operators (``+``, +``-``, ``*``, ...) between :class:`ndarrays <ndarray>` broadcast the +arrays before operation. + +.. _arrays.broadcasting.broadcastable: + +.. index:: broadcastable + +A set of arrays is called ":term:`broadcastable`" to the same shape if +the above rules produce a valid result, *i.e.*, one of the following +is true: + +1. The arrays all have exactly the same shape. + +2. The arrays all have the same number of dimensions and the length of + each dimensions is either a common length or 1. + +3. The arrays that have too few dimensions can have their shapes prepended + with a dimension of length 1 to satisfy property 2. + +.. admonition:: Example + + If ``a.shape`` is (5,1), ``b.shape`` is (1,6), ``c.shape`` is (6,) + and d.shape is ``()`` so that d is a scalar, then *a*, *b*, *c*, + and *d* are all broadcastable to dimension (5,6); and + + - *a* acts like a (5,6) array where ``a[:,0]`` is broadcast to the other + columns, + + - *b* acts like a (5,6) array where ``b[0,:]`` is broadcast + to the other rows, + + - *c* acts like a (1,6) array and therefore like a (5,6) array + where ``c[:]` is broadcast to every row, and finally, + + - *d* acts like a (5,6) array where the single value is repeated. + + +.. _ufuncs.output-type: + +Output type determination +========================= + +The output of the ufunc (and its methods) is not necessarily an +:class:`ndarray`, if all input arguments are not :class:`ndarrays <ndarray>`. + +All output arrays will be passed to the :obj:`__array_wrap__` +method of the input (besides :class:`ndarrays <ndarray>`, and scalars) +that defines it **and** has the highest :obj:`__array_priority__` of +any other input to the universal function. The default +:obj:`__array_priority__` of the ndarray is 0.0, and the default +:obj:`__array_priority__` of a subtype is 1.0. Matrices have +:obj:`__array_priority__` equal to 10.0. + +The ufuncs can also all take output arguments. The output will be cast +if necessary to the provided output array. If a class with an +:obj:`__array__` method is used for the output, results will be +written to the object returned by :obj:`__array__`. Then, if the class +also has an :obj:`__array_wrap__` method, the returned +:class:`ndarray` result will be passed to that method just before +passing control back to the caller. + +Use of internal buffers +======================= + +.. index:: buffers + +Internally, buffers are used for misaligned data, swapped data, and +data that has to be converted from one data type to another. The size +of the internal buffers is settable on a per-thread basis. There can +be up to :math:`2 (n_{\mathrm{inputs}} + n_{\mathrm{outputs}})` +buffers of the specified size created to handle the data from all the +inputs and outputs of a ufunc. The default size of the buffer is +10,000 elements. Whenever buffer-based calculation would be needed, +but all input arrays are smaller than the buffer size, those +misbehaved or incorrect typed arrays will be copied before the +calculation proceeds. Adjusting the size of the buffer may therefore +alter the speed at which ufunc calculations of various sorts are +completed. A simple interface for setting this variable is accessible +using the function + +.. autosummary:: + :toctree: generated/ + + setbufsize + + +Error handling +============== + +.. index:: error handling + +Universal functions can trip special floating point status registers +in your hardware (such as divide-by-zero). If available on your +platform, these registers will be regularly checked during +calculation. Error handling is controlled on a per-thread basis, +and can be configured using the functions + +.. autosummary:: + :toctree: generated/ + + seterr + seterrcall + +.. _ufuncs.casting: + +Casting Rules +============= + +.. index:: + pair: ufunc; casting rules + +At the core of every ufunc is a one-dimensional strided loop that +implements the actual function for a specific type combination. When a +ufunc is created, it is given a static list of inner loops and a +corresponding list of type signatures over which the ufunc operates. +The ufunc machinery uses this list to determine which inner loop to +use for a particular case. You can inspect the :attr:`.types +<ufunc.types>` attribute for a particular ufunc to see which type +combinations have a defined inner loop and which output type they +produce (:ref:`character codes <arrays.scalars.character-codes>` are used in +that output for brevity). + +Casting must be done on one or more of the inputs whenever the ufunc +does not have a core loop implementation for the input types provided. +If an implementation for the input types cannot be found, then the +algorithm searches for an implementation with a type signature to +which all of the inputs can be cast "safely." The first one it finds +in its internal list of loops is selected and performed with types +cast. Recall that internal copies during ufuncs (even for casting) are +limited to the size of an internal buffer which is user settable. + +.. note:: + + Universal functions in NumPy are flexible enough to have mixed type + signatures. Thus, for example, a universal function could be defined + that works with floating point and integer values. See :func:`ldexp` + for an example. + +By the above description, the casting rules are essentially +implemented by the question of when a data type can be cast "safely" +to another data type. The answer to this question can be determined in +Python with a function call: :func:`can_cast(fromtype, totype) +<can_cast>`. Figure shows the results of this call for my 32-bit +system on the 21 internally supported types. You can generate this +table for your system with code shown in that Figure. + +.. admonition:: Figure + + Code segment showing the can cast safely table for a 32-bit system. + + >>> def print_table(ntypes): + ... print 'X', + ... for char in ntypes: print char, + ... print + ... for row in ntypes: + ... print row, + ... for col in ntypes: + ... print int(np.can_cast(row, col)), + ... print + >>> print_table(np.typecodes['All']) + X ? b h i l q p B H I L Q P f d g F D G S U V O + ? 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 + b 0 1 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 + h 0 0 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 + i 0 0 0 1 1 1 1 0 0 0 0 0 0 0 1 1 0 1 1 1 1 1 1 + l 0 0 0 1 1 1 1 0 0 0 0 0 0 0 1 1 0 1 1 1 1 1 1 + q 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 1 1 1 1 1 + p 0 0 0 1 1 1 1 0 0 0 0 0 0 0 1 1 0 1 1 1 1 1 1 + B 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 + H 0 0 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 + I 0 0 0 0 0 1 0 0 0 1 1 1 1 0 1 1 0 1 1 1 1 1 1 + L 0 0 0 0 0 1 0 0 0 1 1 1 1 0 1 1 0 1 1 1 1 1 1 + Q 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 1 1 1 1 1 1 + P 0 0 0 0 0 1 0 0 0 1 1 1 1 0 1 1 0 1 1 1 1 1 1 + f 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 + d 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1 1 1 1 1 + g 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 1 1 + F 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 + D 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 + G 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 + S 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 + U 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 + V 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 + O 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 + +You should note that, while included in the table for completeness, +the 'S', 'U', and 'V' types cannot be operated on by ufuncs. Also, +note that on a 64-bit system the integer types may have different +sizes resulting in a slightly altered table. + +Mixed scalar-array operations use a different set of casting rules +that ensure that a scalar cannot upcast an array unless the scalar is +of a fundamentally different kind of data (*i.e.* under a different +hierachy in the data type hierarchy) than the array. This rule +enables you to use scalar constants in your code (which as Python +types are interpreted accordingly in ufuncs) without worrying about +whether the precision of the scalar constant will cause upcasting on +your large (small precision) array. + + +:class:`ufunc` +============== + +Optional keyword arguments +-------------------------- + +All ufuncs take optional keyword arguments. These represent rather +advanced usage and will likely not be used by most users. + +.. index:: + pair: ufunc; keyword arguments + +*sig* + + Either a data-type, a tuple of data-types, or a special signature + string indicating the input and output types of a ufunc. This argument + allows you to specify a specific signature for a the 1-d loop to use + in the underlying calculation. If the loop specified does not exist + for the ufunc, then a TypeError is raised. Normally a suitable loop is + found automatically by comparing the input types with what is + available and searching for a loop with data-types to which all inputs + can be cast safely. This key-word argument lets you by-pass that + search and choose a loop you want. A list of available signatures is + available in the **types** attribute of the ufunc object. + +*extobj* + + a list of length 1, 2, or 3 specifying the ufunc buffer-size, the + error mode integer, and the error call-back function. Normally, these + values are looked-up in a thread-specific dictionary. Passing them + here bypasses that look-up and uses the low-level specification + provided for the error-mode. This may be useful as an optimization for + calculations requiring lots of ufuncs on small arrays in a loop. + + +Attributes +---------- + +There are some informational attributes that universal functions +possess. None of the attributes can be set. + +.. index:: + pair: ufunc; attributes + + +============ ================================================================= +**__doc__** A docstring for each ufunc. The first part of the docstring is + dynamically generated from the number of outputs, the name, and + the number of inputs. The second part of the doc string is + provided at creation time and stored with the ufunc. + +**__name__** The name of the ufunc. +============ ================================================================= + +.. autosummary:: + :toctree: generated/ + + ufunc.nin + ufunc.nout + ufunc.nargs + ufunc.ntypes + ufunc.types + ufunc.identity + +Methods +------- + +All ufuncs have 4 methods. However, these methods only make sense on +ufuncs that take two input arguments and return one output argument. +Attempting to call these methods on other ufuncs will cause a +:exc:`ValueError` . The reduce-like methods all take an *axis* keyword +and a *dtype* keyword, and the arrays must all have dimension >= +1. The *axis* keyword specifies which axis of the array the reduction +will take place over and may be negative, but must be an integer. The +*dtype* keyword allows you to manage a very common problem that arises +when naively using `{op}.reduce <ufunc.reduce>`. Sometimes you may +have an array of a certain data type and wish to add up all of its +elements, but the result does not fit into the data type of the +array. This commonly happens if you have an array of single-byte +integers. The *dtype* keyword allows you to alter the data type that the +reduction takes place over (and therefore the type of the +output). Thus, you can ensure that the output is a data type with +large-enough precision to handle your output. The responsibility of +altering the reduce type is mostly up to you. There is one exception: +if no *dtype* is given for a reduction on the "add" or "multiply" +operations, then if the input type is an integer (or boolean) data- +type and smaller than the size of the :class:`int_` data type, it will +be internally upcast to the :class:`int_` (or :class:`uint`) data +type. + +.. index:: + pair: ufunc; methods + +.. autosummary:: + :toctree: generated/ + + ufunc.reduce + ufunc.accumulate + ufunc.reduceat + ufunc.outer + + +.. warning:: + + A reduce-like operation on an array with a data type that has + range "too small "to handle the result will silently wrap. You + should use dtype to increase the data type over which reduction + takes place. + + +Available ufuncs +================ + +There are currently more than 60 universal functions defined in +:mod:`numpy` on one or more types, covering a wide variety of +operations. Some of these ufuncs are called automatically on arrays +when the relevant infix notation is used (*e.g.* :func:`add(a, b) <add>` +is called internally when ``a + b`` is written and *a* or *b* is an +:class:`ndarray`). Nonetheless, you may still want to use the ufunc +call in order to use the optional output argument(s) to place the +output(s) in an object (or in objects) of your choice. + +Recall that each ufunc operates element-by-element. Therefore, each +ufunc will be described as if acting on a set of scalar inputs to +return a set of scalar outputs. + +.. note:: + + The ufunc still returns its output(s) even if you use the optional + output argument(s). + +Math operations +--------------- + +.. autosummary:: + + add + subtract + multiply + divide + logaddexp + true_divide + floor_divide + negative + power + remainder + mod + fmod + absolute + rint + sign + conj + exp + log + expm1 + log1p + log10 + sqrt + square + reciprocal + ones_like + +.. tip:: + + The optional output arguments can be used to help you save memory + for large calculations. If your arrays are large, complicated + expressions can take longer than absolutely necessary due to the + creation and (later) destruction of temporary calculation + spaces. For example, the expression ``G=a*b+c`` is equivalent to + ``t1=A*B; G=T1+C; del t1``. It will be more quickly executed as + ``G=A*B; add(G,C,G)`` which is the same as ``G=A*B; G+=C``. + + +Trigonometric functions +----------------------- +All trigonometric functions use radians when an angle is called for. +The ratio of degrees to radians is :math:`180^{\circ}/\pi.` + +.. autosummary:: + + sin + cos + tan + arcsin + arccos + arctan + arctan2 + hypot + sinh + cosh + tanh + arcsinh + arccosh + arctanh + deg2rad + rad2deg + +Bit-twiddling functions +----------------------- + +These function all need integer arguments and they maniuplate the bit- +pattern of those arguments. + +.. autosummary:: + + bitwise_and + bitwise_or + bitwise_xor + invert + left_shift + right_shift + +Comparison functions +-------------------- + +.. autosummary:: + + greater + greater_equal + less + less_equal + not_equal + equal + +.. warning:: + + Do not use the Python keywords ``and`` and ``or`` to combine + logical array expressions. These keywords will test the truth + value of the entire array (not element-by-element as you might + expect). Use the bitwise operators: & and \| instead. + +.. autosummary:: + + logical_and + logical_or + logical_xor + logical_not + +.. warning:: + + The Bitwise operators (& and \|) are the proper way to combine + element-by-element array comparisons. Be sure to understand the + operator precedence: (a>2) & (a<5) is the proper syntax because a>2 & + a<5 will result in an error due to the fact that 2 & a is evaluated + first. + +.. autosummary:: + + maximum + +.. tip:: + + The Python function max() will find the maximum over a one-dimensional + array, but it will do so using a slower sequence interface. The reduce + method of the maximum ufunc is much faster. Also, the max() method + will not give answers you might expect for arrays with greater than + one dimension. The reduce method of minimum also allows you to compute + a total minimum over an array. + +.. autosummary:: + + minimum + +.. warning:: + + the behavior of maximum(a,b) is than that of max(a,b). As a ufunc, + maximum(a,b) performs an element-by-element comparison of a and b and + chooses each element of the result according to which element in the + two arrays is larger. In contrast, max(a,b) treats the objects a and b + as a whole, looks at the (total) truth value of a>b and uses it to + return either a or b (as a whole). A similar difference exists between + minimum(a,b) and min(a,b). + + +Floating functions +------------------ + +Recall that all of these functions work element-by-element over an +array, returning an array output. The description details only a +single operation. + +.. autosummary:: + + isreal + iscomplex + isfinite + isinf + isnan + signbit + modf + ldexp + frexp + fmod + floor + ceil + trunc diff --git a/doc/source/scipyshiny_small.png b/doc/source/scipyshiny_small.png Binary files differnew file mode 100644 index 000000000..7ef81a9e8 --- /dev/null +++ b/doc/source/scipyshiny_small.png diff --git a/doc/source/user/basics.broadcasting.rst b/doc/source/user/basics.broadcasting.rst new file mode 100644 index 000000000..65584b1fd --- /dev/null +++ b/doc/source/user/basics.broadcasting.rst @@ -0,0 +1,7 @@ +************ +Broadcasting +************ + +.. seealso:: :class:`numpy.broadcast` + +.. automodule:: numpy.doc.broadcasting diff --git a/doc/source/user/basics.creation.rst b/doc/source/user/basics.creation.rst new file mode 100644 index 000000000..b3fa81017 --- /dev/null +++ b/doc/source/user/basics.creation.rst @@ -0,0 +1,9 @@ +.. _arrays.creation: + +************** +Array creation +************** + +.. seealso:: :ref:`Array creation routines <routines.array-creation>` + +.. automodule:: numpy.doc.creation diff --git a/doc/source/user/basics.indexing.rst b/doc/source/user/basics.indexing.rst new file mode 100644 index 000000000..7427874a5 --- /dev/null +++ b/doc/source/user/basics.indexing.rst @@ -0,0 +1,16 @@ +.. _basics.indexing: + +******** +Indexing +******** + +.. seealso:: :ref:`Indexing routines <routines.indexing>` + +.. note:: + + XXX: Combine ``numpy.doc.indexing`` with material + section 2.2 Basic indexing? + Or incorporate the material directly here? + + +.. automodule:: numpy.doc.indexing diff --git a/doc/source/user/basics.rec.rst b/doc/source/user/basics.rec.rst new file mode 100644 index 000000000..81a3de8e3 --- /dev/null +++ b/doc/source/user/basics.rec.rst @@ -0,0 +1,5 @@ +*************************************** +Structured arrays (aka "Record arrays") +*************************************** + +.. automodule:: numpy.doc.structured_arrays diff --git a/doc/source/user/basics.rst b/doc/source/user/basics.rst new file mode 100644 index 000000000..b31f38ae9 --- /dev/null +++ b/doc/source/user/basics.rst @@ -0,0 +1,17 @@ +************ +Numpy basics +************ + +.. note:: + + XXX: there is overlap between this text extracted from ``numpy.doc`` + and "Guide to Numpy" chapter 2. Needs combining? + +.. toctree:: + + basics.types + basics.creation + basics.indexing + basics.broadcasting + basics.rec + basics.subclassing diff --git a/doc/source/user/basics.subclassing.rst b/doc/source/user/basics.subclassing.rst new file mode 100644 index 000000000..43315521c --- /dev/null +++ b/doc/source/user/basics.subclassing.rst @@ -0,0 +1,7 @@ +.. _basics.subclassing: + +******************* +Subclassing ndarray +******************* + +.. automodule:: numpy.doc.subclassing diff --git a/doc/source/user/basics.types.rst b/doc/source/user/basics.types.rst new file mode 100644 index 000000000..1a95dc6b4 --- /dev/null +++ b/doc/source/user/basics.types.rst @@ -0,0 +1,14 @@ +********** +Data types +********** + +.. seealso:: :ref:`Data type objects <arrays.dtypes>` + +.. note:: + + XXX: Combine ``numpy.doc.indexing`` with material from + "Guide to Numpy" (section 2.1 Data-Type descriptors)? + Or incorporate the material directly here? + + +.. automodule:: numpy.doc.basics diff --git a/doc/source/user/c-info.beyond-basics.rst b/doc/source/user/c-info.beyond-basics.rst new file mode 100644 index 000000000..905ab67eb --- /dev/null +++ b/doc/source/user/c-info.beyond-basics.rst @@ -0,0 +1,734 @@ +***************** +Beyond the Basics +***************** + +| The voyage of discovery is not in seeking new landscapes but in having +| new eyes. +| --- *Marcel Proust* + +| Discovery is seeing what everyone else has seen and thinking what no +| one else has thought. +| --- *Albert Szent-Gyorgi* + + +Iterating over elements in the array +==================================== + +.. _`sec:array_iterator`: + +Basic Iteration +--------------- + +One common algorithmic requirement is to be able to walk over all +elements in a multidimensional array. The array iterator object makes +this easy to do in a generic way that works for arrays of any +dimension. Naturally, if you know the number of dimensions you will be +using, then you can always write nested for loops to accomplish the +iteration. If, however, you want to write code that works with any +number of dimensions, then you can make use of the array iterator. An +array iterator object is returned when accessing the .flat attribute +of an array. + +.. index:: + single: array iterator + +Basic usage is to call :cfunc:`PyArray_IterNew` ( ``array`` ) where array +is an ndarray object (or one of its sub-classes). The returned object +is an array-iterator object (the same object returned by the .flat +attribute of the ndarray). This object is usually cast to +PyArrayIterObject* so that its members can be accessed. The only +members that are needed are ``iter->size`` which contains the total +size of the array, ``iter->index``, which contains the current 1-d +index into the array, and ``iter->dataptr`` which is a pointer to the +data for the current element of the array. Sometimes it is also +useful to access ``iter->ao`` which is a pointer to the underlying +ndarray object. + +After processing data at the current element of the array, the next +element of the array can be obtained using the macro +:cfunc:`PyArray_ITER_NEXT` ( ``iter`` ). The iteration always proceeds in a +C-style contiguous fashion (last index varying the fastest). The +:cfunc:`PyArray_ITER_GOTO` ( ``iter``, ``destination`` ) can be used to +jump to a particular point in the array, where ``destination`` is an +array of npy_intp data-type with space to handle at least the number +of dimensions in the underlying array. Occasionally it is useful to +use :cfunc:`PyArray_ITER_GOTO1D` ( ``iter``, ``index`` ) which will jump +to the 1-d index given by the value of ``index``. The most common +usage, however, is given in the following example. + +.. code-block:: c + + PyObject *obj; /* assumed to be some ndarray object */ + PyArrayIterObject *iter; + ... + iter = (PyArrayIterObject *)PyArray_IterNew(obj); + if (iter == NULL) goto fail; /* Assume fail has clean-up code */ + while (iter->index < iter->size) { + /* do something with the data at it->dataptr */ + PyArray_ITER_NEXT(it); + } + ... + +You can also use :cfunc:`PyArrayIter_Check` ( ``obj`` ) to ensure you have +an iterator object and :cfunc:`PyArray_ITER_RESET` ( ``iter`` ) to reset an +iterator object back to the beginning of the array. + +It should be emphasized at this point that you may not need the array +iterator if your array is already contiguous (using an array iterator +will work but will be slower than the fastest code you could write). +The major purpose of array iterators is to encapsulate iteration over +N-dimensional arrays with arbitrary strides. They are used in many, +many places in the NumPy source code itself. If you already know your +array is contiguous (Fortran or C), then simply adding the element- +size to a running pointer variable will step you through the array +very efficiently. In other words, code like this will probably be +faster for you in the contiguous case (assuming doubles). + +.. code-block:: c + + npy_intp size; + double *dptr; /* could make this any variable type */ + size = PyArray_SIZE(obj); + dptr = PyArray_DATA(obj); + while(size--) { + /* do something with the data at dptr */ + dptr++; + } + + +Iterating over all but one axis +------------------------------- + +A common algorithm is to loop over all elements of an array and +perform some function with each element by issuing a function call. As +function calls can be time consuming, one way to speed up this kind of +algorithm is to write the function so it takes a vector of data and +then write the iteration so the function call is performed for an +entire dimension of data at a time. This increases the amount of work +done per function call, thereby reducing the function-call over-head +to a small(er) fraction of the total time. Even if the interior of the +loop is performed without a function call it can be advantageous to +perform the inner loop over the dimension with the highest number of +elements to take advantage of speed enhancements available on micro- +processors that use pipelining to enhance fundmental operations. + +The :cfunc:`PyArray_IterAllButAxis` ( ``array``, ``&dim`` ) constructs an +iterator object that is modified so that it will not iterate over the +dimension indicated by dim. The only restriction on this iterator +object, is that the :cfunc:`PyArray_Iter_GOTO1D` ( ``it``, ``ind`` ) macro +cannot be used (thus flat indexing won't work either if you pass this +object back to Python --- so you shouldn't do this). Note that the +returned object from this routine is still usually cast to +PyArrayIterObject \*. All that's been done is to modify the strides +and dimensions of the returned iterator to simulate iterating over +array[...,0,...] where 0 is placed on the +:math:`\textrm{dim}^{\textrm{th}}` dimension. If dim is negative, then +the dimension with the largest axis is found and used. + + +Iterating over multiple arrays +------------------------------ + +Very often, it is desireable to iterate over several arrays at the +same time. The universal functions are an example of this kind of +behavior. If all you want to do is iterate over arrays with the same +shape, then simply creating several iterator objects is the standard +procedure. For example, the following code iterates over two arrays +assumed to be the same shape and size (actually obj1 just has to have +at least as many total elements as does obj2): + +.. code-block:: c + + /* It is already assumed that obj1 and obj2 + are ndarrays of the same shape and size. + */ + iter1 = (PyArrayIterObject *)PyArray_IterNew(obj1); + if (iter1 == NULL) goto fail; + iter2 = (PyArrayIterObject *)PyArray_IterNew(obj2); + if (iter2 == NULL) goto fail; /* assume iter1 is DECREF'd at fail */ + while (iter2->index < iter2->size) { + /* process with iter1->dataptr and iter2->dataptr */ + PyArray_ITER_NEXT(iter1); + PyArray_ITER_NEXT(iter2); + } + + +Broadcasting over multiple arrays +--------------------------------- + +.. index:: + single: broadcasting + +When multiple arrays are involved in an operation, you may want to use the same +broadcasting rules that the math operations ( *i.e.* the ufuncs) use. This can +be done easily using the :ctype:`PyArrayMultiIterObject`. This is the object +returned from the Python command numpy.broadcast and it is almost as easy to +use from C. The function :cfunc:`PyArray_MultiIterNew` ( ``n``, ``...`` ) is +used (with ``n`` input objects in place of ``...`` ). The input objects can be +arrays or anything that can be converted into an array. A pointer to a +PyArrayMultiIterObject is returned. Broadcasting has already been accomplished +which adjusts the iterators so that all that needs to be done to advance to the +next element in each array is for PyArray_ITER_NEXT to be called for each of +the inputs. This incrementing is automatically performed by +:cfunc:`PyArray_MultiIter_NEXT` ( ``obj`` ) macro (which can handle a +multiterator ``obj`` as either a :ctype:`PyArrayMultiObject *` or a +:ctype:`PyObject *`). The data from input number ``i`` is available using +:cfunc:`PyArray_MultiIter_DATA` ( ``obj``, ``i`` ) and the total (broadcasted) +size as :cfunc:`PyArray_MultiIter_SIZE` ( ``obj``). An example of using this +feature follows. + +.. code-block:: c + + mobj = PyArray_MultiIterNew(2, obj1, obj2); + size = PyArray_MultiIter_SIZE(obj); + while(size--) { + ptr1 = PyArray_MultiIter_DATA(mobj, 0); + ptr2 = PyArray_MultiIter_DATA(mobj, 1); + /* code using contents of ptr1 and ptr2 */ + PyArray_MultiIter_NEXT(mobj); + } + +The function :cfunc:`PyArray_RemoveLargest` ( ``multi`` ) can be used to +take a multi-iterator object and adjust all the iterators so that +iteration does not take place over the largest dimension (it makes +that dimension of size 1). The code being looped over that makes use +of the pointers will very-likely also need the strides data for each +of the iterators. This information is stored in +multi->iters[i]->strides. + +.. index:: + single: array iterator + +There are several examples of using the multi-iterator in the NumPy +source code as it makes N-dimensional broadcasting-code very simple to +write. Browse the source for more examples. + +.. _`sec:Creating-a-new`: + +Creating a new universal function +================================= + +.. index:: + pair: ufunc; adding new + +The umath module is a computer-generated C-module that creates many +ufuncs. It provides a great many examples of how to create a universal +function. Creating your own ufunc that will make use of the ufunc +machinery is not difficult either. Suppose you have a function that +you want to operate element-by-element over its inputs. By creating a +new ufunc you will obtain a function that handles + +- broadcasting + +- N-dimensional looping + +- automatic type-conversions with minimal memory usage + +- optional output arrays + +It is not difficult to create your own ufunc. All that is required is +a 1-d loop for each data-type you want to support. Each 1-d loop must +have a specific signature, and only ufuncs for fixed-size data-types +can be used. The function call used to create a new ufunc to work on +built-in data-types is given below. A different mechanism is used to +register ufuncs for user-defined data-types. + +.. cfunction:: PyObject *PyUFunc_FromFuncAndData( PyUFuncGenericFunction* func, void** data, char* types, int ntypes, int nin, int nout, int identity, char* name, char* doc, int check_return) + + *func* + + A pointer to an array of 1-d functions to use. This array must be at + least ntypes long. Each entry in the array must be a ``PyUFuncGenericFunction`` function. This function has the following signature. An example of a + valid 1d loop function is also given. + + .. cfunction:: void loop1d(char** args, npy_intp* dimensions, npy_intp* steps, void* data) + + *args* + + An array of pointers to the actual data for the input and output + arrays. The input arguments are given first followed by the output + arguments. + + *dimensions* + + A pointer to the size of the dimension over which this function is + looping. + + *steps* + + A pointer to the number of bytes to jump to get to the + next element in this dimension for each of the input and + output arguments. + + *data* + + Arbitrary data (extra arguments, function names, *etc.* ) + that can be stored with the ufunc and will be passed in + when it is called. + + .. code-block:: c + + static void + double_add(char *args, npy_intp *dimensions, npy_intp *steps, void *extra) + { + npy_intp i; + npy_intp is1=steps[0], is2=steps[1]; + npy_intp os=steps[2], n=dimensions[0]; + char *i1=args[0], *i2=args[1], *op=args[2]; + for (i=0; i<n; i++) { + *((double *)op) = *((double *)i1) + \ + *((double *)i2); + i1 += is1; i2 += is2; op += os; + } + } + + *data* + + An array of data. There should be ntypes entries (or NULL) --- one for + every loop function defined for this ufunc. This data will be passed + in to the 1-d loop. One common use of this data variable is to pass in + an actual function to call to compute the result when a generic 1-d + loop (e.g. :cfunc:`PyUFunc_d_d`) is being used. + + *types* + + An array of type-number signatures (type ``char`` ). This + array should be of size (nin+nout)*ntypes and contain the + data-types for the corresponding 1-d loop. The inputs should + be first followed by the outputs. For example, suppose I have + a ufunc that supports 1 integer and 1 double 1-d loop + (length-2 func and data arrays) that takes 2 inputs and + returns 1 output that is always a complex double, then the + types array would be + + + The bit-width names can also be used (e.g. :cdata:`NPY_INT32`, + :cdata:`NPY_COMPLEX128` ) if desired. + + *ntypes* + + The number of data-types supported. This is equal to the number of 1-d + loops provided. + + *nin* + + The number of input arguments. + + *nout* + + The number of output arguments. + + *identity* + + Either :cdata:`PyUFunc_One`, :cdata:`PyUFunc_Zero`, :cdata:`PyUFunc_None`. + This specifies what should be returned when an empty array is + passed to the reduce method of the ufunc. + + *name* + + A ``NULL`` -terminated string providing the name of this ufunc + (should be the Python name it will be called). + + *doc* + + A documentation string for this ufunc (will be used in generating the + response to ``{ufunc_name}.__doc__``). Do not include the function + signature or the name as this is generated automatically. + + *check_return* + + Not presently used, but this integer value does get set in the + structure-member of similar name. + + .. index:: + pair: ufunc; adding new + + The returned ufunc object is a callable Python object. It should be + placed in a (module) dictionary under the same name as was used in the + name argument to the ufunc-creation routine. The following example is + adapted from the umath module + + .. code-block:: c + + static PyUFuncGenericFunction atan2_functions[]=\ + {PyUFunc_ff_f, PyUFunc_dd_d, + PyUFunc_gg_g, PyUFunc_OO_O_method}; + static void* atan2_data[]=\ + {(void *)atan2f,(void *) atan2, + (void *)atan2l,(void *)"arctan2"}; + static char atan2_signatures[]=\ + {NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, + NPY_DOUBLE, NPY_DOUBLE, + NPY_DOUBLE, NPY_LONGDOUBLE, + NPY_LONGDOUBLE, NPY_LONGDOUBLE + NPY_OBJECT, NPY_OBJECT, + NPY_OBJECT}; + ... + /* in the module initialization code */ + PyObject *f, *dict, *module; + ... + dict = PyModule_GetDict(module); + ... + f = PyUFunc_FromFuncAndData(atan2_functions, + atan2_data, atan2_signatures, 4, 2, 1, + PyUFunc_None, "arctan2", + "a safe and correct arctan(x1/x2)", 0); + PyDict_SetItemString(dict, "arctan2", f); + Py_DECREF(f); + ... + + +User-defined data-types +======================= + +NumPy comes with 21 builtin data-types. While this covers a large +majority of possible use cases, it is conceivable that a user may have +a need for an additional data-type. There is some support for adding +an additional data-type into the NumPy system. This additional data- +type will behave much like a regular data-type except ufuncs must have +1-d loops registered to handle it separately. Also checking for +whether or not other data-types can be cast "safely" to and from this +new type or not will always return "can cast" unless you also register +which types your new data-type can be cast to and from. Adding +data-types is one of the less well-tested areas for NumPy 1.0, so +there may be bugs remaining in the approach. Only add a new data-type +if you can't do what you want to do using the OBJECT or VOID +data-types that are already available. As an example of what I +consider a useful application of the ability to add data-types is the +possibility of adding a data-type of arbitrary precision floats to +NumPy. + +.. index:: + pair: dtype; adding new + + +Adding the new data-type +------------------------ + +To begin to make use of the new data-type, you need to first define a +new Python type to hold the scalars of your new data-type. It should +be acceptable to inherit from one of the array scalars if your new +type has a binary compatible layout. This will allow your new data +type to have the methods and attributes of array scalars. New data- +types must have a fixed memory size (if you want to define a data-type +that needs a flexible representation, like a variable-precision +number, then use a pointer to the object as the data-type). The memory +layout of the object structure for the new Python type must be +PyObject_HEAD followed by the fixed-size memory needed for the data- +type. For example, a suitable structure for the new Python type is: + +.. code-block:: c + + typedef struct { + PyObject_HEAD; + some_data_type obval; + /* the name can be whatever you want */ + } PySomeDataTypeObject; + +After you have defined a new Python type object, you must then define +a new :ctype:`PyArray_Descr` structure whose typeobject member will contain a +pointer to the data-type you've just defined. In addition, the +required functions in the ".f" member must be defined: nonzero, +copyswap, copyswapn, setitem, getitem, and cast. The more functions in +the ".f" member you define, however, the more useful the new data-type +will be. It is very important to intialize unused functions to NULL. +This can be achieved using :cfunc:`PyArray_InitArrFuncs` (f). + +Once a new :ctype:`PyArray_Descr` structure is created and filled with the +needed information and useful functions you call +:cfunc:`PyArray_RegisterDataType` (new_descr). The return value from this +call is an integer providing you with a unique type_number that +specifies your data-type. This type number should be stored and made +available by your module so that other modules can use it to recognize +your data-type (the other mechanism for finding a user-defined +data-type number is to search based on the name of the type-object +associated with the data-type using :cfunc:`PyArray_TypeNumFromName` ). + + +Registering a casting function +------------------------------ + +You may want to allow builtin (and other user-defined) data-types to +be cast automatically to your data-type. In order to make this +possible, you must register a casting function with the data-type you +want to be able to cast from. This requires writing low-level casting +functions for each conversion you want to support and then registering +these functions with the data-type descriptor. A low-level casting +function has the signature. + +.. cfunction:: void castfunc( void* from, void* to, npy_intp n, void* fromarr, void* toarr) + + Cast ``n`` elements ``from`` one type ``to`` another. The data to + cast from is in a contiguous, correctly-swapped and aligned chunk + of memory pointed to by from. The buffer to cast to is also + contiguous, correctly-swapped and aligned. The fromarr and toarr + arguments should only be used for flexible-element-sized arrays + (string, unicode, void). + +An example castfunc is: + +.. code-block:: c + + static void + double_to_float(double *from, float* to, npy_intp n, + void* ig1, void* ig2); + while (n--) { + (*to++) = (double) *(from++); + } + +This could then be registered to convert doubles to floats using the +code: + +.. code-block:: c + + doub = PyArray_DescrFromType(NPY_DOUBLE); + PyArray_RegisterCastFunc(doub, NPY_FLOAT, + (PyArray_VectorUnaryFunc *)double_to_float); + Py_DECREF(doub); + + +Registering coercion rules +-------------------------- + +By default, all user-defined data-types are not presumed to be safely +castable to any builtin data-types. In addition builtin data-types are +not presumed to be safely castable to user-defined data-types. This +situation limits the ability of user-defined data-types to participate +in the coercion system used by ufuncs and other times when automatic +coercion takes place in NumPy. This can be changed by registering +data-types as safely castable from a particlar data-type object. The +function :cfunc:`PyArray_RegisterCanCast` (from_descr, totype_number, +scalarkind) should be used to specify that the data-type object +from_descr can be cast to the data-type with type number +totype_number. If you are not trying to alter scalar coercion rules, +then use :cdata:`PyArray_NOSCALAR` for the scalarkind argument. + +If you want to allow your new data-type to also be able to share in +the scalar coercion rules, then you need to specify the scalarkind +function in the data-type object's ".f" member to return the kind of +scalar the new data-type should be seen as (the value of the scalar is +available to that function). Then, you can register data-types that +can be cast to separately for each scalar kind that may be returned +from your user-defined data-type. If you don't register scalar +coercion handling, then all of your user-defined data-types will be +seen as :cdata:`PyArray_NOSCALAR`. + + +Registering a ufunc loop +------------------------ + +You may also want to register low-level ufunc loops for your data-type +so that an ndarray of your data-type can have math applied to it +seamlessly. Registering a new loop with exactly the same arg_types +signature, silently replaces any previously registered loops for that +data-type. + +Before you can register a 1-d loop for a ufunc, the ufunc must be +previously created. Then you call :cfunc:`PyUFunc_RegisterLoopForType` +(...) with the information needed for the loop. The return value of +this function is ``0`` if the process was successful and ``-1`` with +an error condition set if it was not successful. + +.. cfunction:: int PyUFunc_RegisterLoopForType( PyUFuncObject* ufunc, int usertype, PyUFuncGenericFunction function, int* arg_types, void* data) + + *ufunc* + + The ufunc to attach this loop to. + + *usertype* + + The user-defined type this loop should be indexed under. This number + must be a user-defined type or an error occurs. + + *function* + + The ufunc inner 1-d loop. This function must have the signature as + explained in Section `3 <#sec-creating-a-new>`__ . + + *arg_types* + + (optional) If given, this should contain an array of integers of at + least size ufunc.nargs containing the data-types expected by the loop + function. The data will be copied into a NumPy-managed structure so + the memory for this argument should be deleted after calling this + function. If this is NULL, then it will be assumed that all data-types + are of type usertype. + + *data* + + (optional) Specify any optional data needed by the function which will + be passed when the function is called. + + .. index:: + pair: dtype; adding new + + +Subtyping the ndarray in C +========================== + +One of the lesser-used features that has been lurking in Python since +2.2 is the ability to sub-class types in C. This facility is one of +the important reasons for basing NumPy off of the Numeric code-base +which was already in C. A sub-type in C allows much more flexibility +with regards to memory management. Sub-typing in C is not difficult +even if you have only a rudimentary understanding of how to create new +types for Python. While it is easiest to sub-type from a single parent +type, sub-typing from multiple parent types is also possible. Multiple +inheritence in C is generally less useful than it is in Python because +a restriction on Python sub-types is that they have a binary +compatible memory layout. Perhaps for this reason, it is somewhat +easier to sub-type from a single parent type. + +.. index:: + pair: ndarray; subtyping + +All C-structures corresponding to Python objects must begin with +:cmacro:`PyObject_HEAD` (or :cmacro:`PyObject_VAR_HEAD`). In the same +way, any sub-type must have a C-structure that begins with exactly the +same memory layout as the parent type (or all of the parent types in +the case of multiple-inheritance). The reason for this is that Python +may attempt to access a member of the sub-type structure as if it had +the parent structure ( *i.e.* it will cast a given pointer to a +pointer to the parent structure and then dereference one of it's +members). If the memory layouts are not compatible, then this attempt +will cause unpredictable behavior (eventually leading to a memory +violation and program crash). + +One of the elements in :cmacro:`PyObject_HEAD` is a pointer to a +type-object structure. A new Python type is created by creating a new +type-object structure and populating it with functions and pointers to +describe the desired behavior of the type. Typically, a new +C-structure is also created to contain the instance-specific +information needed for each object of the type as well. For example, +:cdata:`&PyArray_Type` is a pointer to the type-object table for the ndarray +while a :ctype:`PyArrayObject *` variable is a pointer to a particular instance +of an ndarray (one of the members of the ndarray structure is, in +turn, a pointer to the type- object table :cdata:`&PyArray_Type`). Finally +:cfunc:`PyType_Ready` (<pointer_to_type_object>) must be called for +every new Python type. + + +Creating sub-types +------------------ + +To create a sub-type, a similar proceedure must be followed except +only behaviors that are different require new entries in the type- +object structure. All other entires can be NULL and will be filled in +by :cfunc:`PyType_Ready` with appropriate functions from the parent +type(s). In particular, to create a sub-type in C follow these steps: + +1. If needed create a new C-structure to handle each instance of your + type. A typical C-structure would be: + + .. code-block:: c + + typedef _new_struct { + PyArrayObject base; + /* new things here */ + } NewArrayObject; + + Notice that the full PyArrayObject is used as the first entry in order + to ensure that the binary layout of instances of the new type is + identical to the PyArrayObject. + +2. Fill in a new Python type-object structure with pointers to new + functions that will over-ride the default behavior while leaving any + function that should remain the same unfilled (or NULL). The tp_name + element should be different. + +3. Fill in the tp_base member of the new type-object structure with a + pointer to the (main) parent type object. For multiple-inheritance, + also fill in the tp_bases member with a tuple containing all of the + parent objects in the order they should be used to define inheritance. + Remember, all parent-types must have the same C-structure for multiple + inheritance to work properly. + +4. Call :cfunc:`PyType_Ready` (<pointer_to_new_type>). If this function + returns a negative number, a failure occurred and the type is not + initialized. Otherwise, the type is ready to be used. It is + generally important to place a reference to the new type into the + module dictionary so it can be accessed from Python. + +More information on creating sub-types in C can be learned by reading +PEP 253 (available at http://www.python.org/dev/peps/pep-0253). + + +Specific features of ndarray sub-typing +--------------------------------------- + +Some special methods and attributes are used by arrays in order to +facilitate the interoperation of sub-types with the base ndarray type. + +.. note:: XXX: some of the documentation below needs to be moved to the + reference guide. + + +The __array_finalize\__ method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. attribute:: ndarray.__array_finalize__ + + Several array-creation functions of the ndarray allow + specification of a particular sub-type to be created. This allows + sub-types to be handled seamlessly in many routines. When a + sub-type is created in such a fashion, however, neither the + __new_\_ method nor the __init\__ method gets called. Instead, the + sub-type is allocated and the appropriate instance-structure + members are filled in. Finally, the :obj:`__array_finalize__` + attribute is looked-up in the object dictionary. If it is present + and not None, then it can be either a CObject containing a pointer + to a :cfunc:`PyArray_FinalizeFunc` or it can be a method taking a + single argument (which could be None). + + If the :obj:`__array_finalize__` attribute is a CObject, then the pointer + must be a pointer to a function with the signature: + + .. code-block:: c + + (int) (PyArrayObject *, PyObject *) + + The first argument is the newly created sub-type. The second argument + (if not NULL) is the "parent" array (if the array was created using + slicing or some other operation where a clearly-distinguishable parent + is present). This routine can do anything it wants to. It should + return a -1 on error and 0 otherwise. + + If the :obj:`__array_finalize__` attribute is not None nor a CObject, + then it must be a Python method that takes the parent array as an + argument (which could be None if there is no parent), and returns + nothing. Errors in this method will be caught and handled. + + +The __array_priority\__ attribute +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. attribute:: ndarray.__array_priority__ + + This attribute allows simple but flexible determination of which sub- + type should be considered "primary" when an operation involving two or + more sub-types arises. In operations where different sub-types are + being used, the sub-type with the largest :obj:`__array_priority__` + attribute will determine the sub-type of the output(s). If two sub- + types have the same :obj:`__array_priority__` then the sub-type of the + first argument determines the output. The default + :obj:`__array_priority__` attribute returns a value of 0.0 for the base + ndarray type and 1.0 for a sub-type. This attribute can also be + defined by objects that are not sub-types of the ndarray and can be + used to determine which :obj:`__array_wrap__` method should be called for + the return output. + +The __array_wrap\__ method +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. attribute:: ndarray.__array_wrap__ + + Any class or type can define this method which should take an ndarray + argument and return an instance of the type. It can be seen as the + opposite of the :obj:`__array__` method. This method is used by the + ufuncs (and other NumPy functions) to allow other objects to pass + through. For Python >2.4, it can also be used to write a decorator + that converts a function that works only with ndarrays to one that + works with any type with :obj:`__array__` and :obj:`__array_wrap__` methods. + +.. index:: + pair: ndarray; subtyping diff --git a/doc/source/user/c-info.how-to-extend.rst b/doc/source/user/c-info.how-to-extend.rst new file mode 100644 index 000000000..56f3c99f1 --- /dev/null +++ b/doc/source/user/c-info.how-to-extend.rst @@ -0,0 +1,641 @@ +******************* +How to extend NumPy +******************* + +| That which is static and repetitive is boring. That which is dynamic +| and random is confusing. In between lies art. +| --- *John A. Locke* + +| Science is a differential equation. Religion is a boundary condition. +| --- *Alan Turing* + + +.. _`sec:Writing-an-extension`: + +Writing an extension module +=========================== + +While the ndarray object is designed to allow rapid computation in +Python, it is also designed to be general-purpose and satisfy a wide- +variety of computational needs. As a result, if absolute speed is +essential, there is no replacement for a well-crafted, compiled loop +specific to your application and hardware. This is one of the reasons +that numpy includes f2py so that an easy-to-use mechanisms for linking +(simple) C/C++ and (arbitrary) Fortran code directly into Python are +available. You are encouraged to use and improve this mechanism. The +purpose of this section is not to document this tool but to document +the more basic steps to writing an extension module that this tool +depends on. + +.. index:: + single: extension module + +When an extension module is written, compiled, and installed to +somewhere in the Python path (sys.path), the code can then be imported +into Python as if it were a standard python file. It will contain +objects and methods that have been defined and compiled in C code. The +basic steps for doing this in Python are well-documented and you can +find more information in the documentation for Python itself available +online at `www.python.org <http://www.python.org>`_ . + +In addition to the Python C-API, there is a full and rich C-API for +NumPy allowing sophisticated manipulations on a C-level. However, for +most applications, only a few API calls will typically be used. If all +you need to do is extract a pointer to memory along with some shape +information to pass to another calculation routine, then you will use +very different calls, then if you are trying to create a new array- +like type or add a new data type for ndarrays. This chapter documents +the API calls and macros that are most commonly used. + + +Required subroutine +=================== + +There is exactly one function that must be defined in your C-code in +order for Python to use it as an extension module. The function must +be called init{name} where {name} is the name of the module from +Python. This function must be declared so that it is visible to code +outside of the routine. Besides adding the methods and constants you +desire, this subroutine must also contain calls to import_array() +and/or import_ufunc() depending on which C-API is needed. Forgetting +to place these commands will show itself as an ugly segmentation fault +(crash) as soon as any C-API subroutine is actually called. It is +actually possible to have multiple init{name} functions in a single +file in which case multiple modules will be defined by that file. +However, there are some tricks to get that to work correctly and it is +not covered here. + +A minimal ``init{name}`` method looks like: + +.. code-block:: c + + PyMODINIT_FUNC + init{name}(void) + { + (void)Py_InitModule({name}, mymethods); + import_array(); + } + +The mymethods must be an array (usually statically declared) of +PyMethodDef structures which contain method names, actual C-functions, +a variable indicating whether the method uses keyword arguments or +not, and docstrings. These are explained in the next section. If you +want to add constants to the module, then you store the returned value +from Py_InitModule which is a module object. The most general way to +add itmes to the module is to get the module dictionary using +PyModule_GetDict(module). With the module dictionary, you can add +whatever you like to the module manually. An easier way to add objects +to the module is to use one of three additional Python C-API calls +that do not require a separate extraction of the module dictionary. +These are documented in the Python documentation, but repeated here +for convenience: + +.. cfunction:: int PyModule_AddObject(PyObject* module, char* name, PyObject* value) + +.. cfunction:: int PyModule_AddIntConstant(PyObject* module, char* name, long value) + +.. cfunction:: int PyModule_AddStringConstant(PyObject* module, char* name, char* value) + + All three of these functions require the *module* object (the + return value of Py_InitModule). The *name* is a string that + labels the value in the module. Depending on which function is + called, the *value* argument is either a general object + (:cfunc:`PyModule_AddObject` steals a reference to it), an integer + constant, or a string constant. + + +Defining functions +================== + +The second argument passed in to the Py_InitModule function is a +structure that makes it easy to to define functions in the module. In +the example given above, the mymethods structure would have been +defined earlier in the file (usually right before the init{name} +subroutine) to: + +.. code-block:: c + + static PyMethodDef mymethods[] = { + { nokeywordfunc,nokeyword_cfunc, + METH_VARARGS, + Doc string}, + { keywordfunc, keyword_cfunc, + METH_VARARGS|METH_KEYWORDS, + Doc string}, + {NULL, NULL, 0, NULL} /* Sentinel */ + } + +Each entry in the mymethods array is a :ctype:`PyMethodDef` structure +containing 1) the Python name, 2) the C-function that implements the +function, 3) flags indicating whether or not keywords are accepted for +this function, and 4) The docstring for the function. Any number of +functions may be defined for a single module by adding more entries to +this table. The last entry must be all NULL as shown to act as a +sentinel. Python looks for this entry to know that all of the +functions for the module have been defined. + +The last thing that must be done to finish the extension module is to +actually write the code that performs the desired functions. There are +two kinds of functions: those that don't accept keyword arguments, and +those that do. + + +Functions without keyword arguments +----------------------------------- + +Functions that don't accept keyword arguments should be written as: + +.. code-block:: c + + static PyObject* + nokeyword_cfunc (PyObject *dummy, PyObject *args) + { + /* convert Python arguments */ + /* do function */ + /* return something */ + } + +The dummy argument is not used in this context and can be safely +ignored. The *args* argument contains all of the arguments passed in +to the function as a tuple. You can do anything you want at this +point, but usually the easiest way to manage the input arguments is to +call :cfunc:`PyArg_ParseTuple` (args, format_string, +addresses_to_C_variables...) or :cfunc:`PyArg_UnpackTuple` (tuple, "name" , +min, max, ...). A good description of how to use the first function is +contained in the Python C-API reference manual under section 5.5 +(Parsing arguments and building values). You should pay particular +attention to the "O&" format which uses converter functions to go +between the Python object and the C object. All of the other format +functions can be (mostly) thought of as special cases of this general +rule. There are several converter functions defined in the NumPy C-API +that may be of use. In particular, the :cfunc:`PyArray_DescrConverter` +function is very useful to support arbitrary data-type specification. +This function transforms any valid data-type Python object into a +:ctype:`PyArray_Descr *` object. Remember to pass in the address of the +C-variables that should be filled in. + +There are lots of examples of how to use :cfunc:`PyArg_ParseTuple` +throughout the NumPy source code. The standard usage is like this: + +.. code-block:: c + + PyObject *input; + PyArray_Descr *dtype; + if (!PyArg_ParseTuple(args, "OO&", &input, + PyArray_DescrConverter, + &dtype)) return NULL; + +It is important to keep in mind that you get a *borrowed* reference to +the object when using the "O" format string. However, the converter +functions usually require some form of memory handling. In this +example, if the conversion is successful, *dtype* will hold a new +reference to a :ctype:`PyArray_Descr *` object, while *input* will hold a +borrowed reference. Therefore, if this conversion were mixed with +another conversion (say to an integer) and the data-type conversion +was successful but the integer conversion failed, then you would need +to release the reference count to the data-type object before +returning. A typical way to do this is to set *dtype* to ``NULL`` +before calling :cfunc:`PyArg_ParseTuple` and then use :cfunc:`Py_XDECREF` +on *dtype* before returning. + +After the input arguments are processed, the code that actually does +the work is written (likely calling other functions as needed). The +final step of the C-function is to return something. If an error is +encountered then ``NULL`` should be returned (making sure an error has +actually been set). If nothing should be returned then increment +:cdata:`Py_None` and return it. If a single object should be returned then +it is returned (ensuring that you own a reference to it first). If +multiple objects should be returned then you need to return a tuple. +The :cfunc:`Py_BuildValue` (format_string, c_variables...) function makes +it easy to build tuples of Python objects from C variables. Pay +special attention to the difference between 'N' and 'O' in the format +string or you can easily create memory leaks. The 'O' format string +increments the reference count of the :ctype:`PyObject *` C-variable it +corresponds to, while the 'N' format string steals a reference to the +corresponding :ctype:`PyObject *` C-variable. You should use 'N' if you ave +already created a reference for the object and just want to give that +reference to the tuple. You should use 'O' if you only have a borrowed +reference to an object and need to create one to provide for the +tuple. + + +Functions with keyword arguments +-------------------------------- + +These functions are very similar to functions without keyword +arguments. The only difference is that the function signature is: + +.. code-block:: c + + static PyObject* + keyword_cfunc (PyObject *dummy, PyObject *args, PyObject *kwds) + { + ... + } + +The kwds argument holds a Python dictionary whose keys are the names +of the keyword arguments and whose values are the corresponding +keyword-argument values. This dictionary can be processed however you +see fit. The easiest way to handle it, however, is to replace the +:cfunc:`PyArg_ParseTuple` (args, format_string, addresses...) function with +a call to :cfunc:`PyArg_ParseTupleAndKeywords` (args, kwds, format_string, +char \*kwlist[], addresses...). The kwlist parameter to this function +is a ``NULL`` -terminated array of strings providing the expected +keyword arguments. There should be one string for each entry in the +format_string. Using this function will raise a TypeError if invalid +keyword arguments are passed in. + +For more help on this function please see section 1.8 (Keyword +Paramters for Extension Functions) of the Extending and Embedding +tutorial in the Python documentation. + + +Reference counting +------------------ + +The biggest difficulty when writing extension modules is reference +counting. It is an important reason for the popularity of f2py, weave, +pyrex, ctypes, etc.... If you mis-handle reference counts you can get +problems from memory-leaks to segmentation faults. The only strategy I +know of to handle reference counts correctly is blood, sweat, and +tears. First, you force it into your head that every Python variable +has a reference count. Then, you understand exactly what each function +does to the reference count of your objects, so that you can properly +use DECREF and INCREF when you need them. Reference counting can +really test the amount of patience and diligence you have towards your +programming craft. Despite the grim depiction, most cases of reference +counting are quite straightforward with the most common difficulty +being not using DECREF on objects before exiting early from a routine +due to some error. In second place, is the common error of not owning +the reference on an object that is passed to a function or macro that +is going to steal the reference ( *e.g.* :cfunc:`PyTuple_SET_ITEM`, and +most functions that take :ctype:`PyArray_Descr` objects). + +.. index:: + single: reference counting + +Typically you get a new reference to a variable when it is created or +is the return value of some function (there are some prominent +exceptions, however --- such as getting an item out of a tuple or a +dictionary). When you own the reference, you are responsible to make +sure that :cfunc:`Py_DECREF` (var) is called when the variable is no +longer necessary (and no other function has "stolen" its +reference). Also, if you are passing a Python object to a function +that will "steal" the reference, then you need to make sure you own it +(or use :cfunc:`Py_INCREF` to get your own reference). You will also +encounter the notion of borrowing a reference. A function that borrows +a reference does not alter the reference count of the object and does +not expect to "hold on "to the reference. It's just going to use the +object temporarily. When you use :cfunc:`PyArg_ParseTuple` or +:cfunc:`PyArg_UnpackTuple` you receive a borrowed reference to the +objects in the tuple and should not alter their reference count inside +your function. With practice, you can learn to get reference counting +right, but it can be frustrating at first. + +One common source of reference-count errors is the :cfunc:`Py_BuildValue` +function. Pay careful attention to the difference between the 'N' +format character and the 'O' format character. If you create a new +object in your subroutine (such as an output array), and you are +passing it back in a tuple of return values, then you should most- +likely use the 'N' format character in :cfunc:`Py_BuildValue`. The 'O' +character will increase the reference count by one. This will leave +the caller with two reference counts for a brand-new array. When the +variable is deleted and the reference count decremented by one, there +will still be that extra reference count, and the array will never be +deallocated. You will have a reference-counting induced memory leak. +Using the 'N' character will avoid this situation as it will return to +the caller an object (inside the tuple) with a single reference count. + +.. index:: + single: reference counting + + + + +Dealing with array objects +========================== + +Most extension modules for NumPy will need to access the memory for an +ndarray object (or one of it's sub-classes). The easiest way to do +this doesn't require you to know much about the internals of NumPy. +The method is to + +1. Ensure you are dealing with a well-behaved array (aligned, in machine + byte-order and single-segment) of the correct type and number of + dimensions. + + 1. By converting it from some Python object using + :cfunc:`PyArray_FromAny` or a macro built on it. + + 2. By constructing a new ndarray of your desired shape and type + using :cfunc:`PyArray_NewFromDescr` or a simpler macro or function + based on it. + + +2. Get the shape of the array and a pointer to its actual data. + +3. Pass the data and shape information on to a subroutine or other + section of code that actually performs the computation. + +4. If you are writing the algorithm, then I recommend that you use the + stride information contained in the array to access the elements of + the array (the :cfunc:`PyArray_GETPTR` macros make this painless). Then, + you can relax your requirements so as not to force a single-segment + array and the data-copying that might result. + +Each of these sub-topics is covered in the following sub-sections. + + +Converting an arbitrary sequence object +--------------------------------------- + +The main routine for obtaining an array from any Python object that +can be converted to an array is :cfunc:`PyArray_FromAny`. This +function is very flexible with many input arguments. Several macros +make it easier to use the basic function. :cfunc:`PyArray_FROM_OTF` is +arguably the most useful of these macros for the most common uses. It +allows you to convert an arbitrary Python object to an array of a +specific builtin data-type ( *e.g.* float), while specifying a +particular set of requirements ( *e.g.* contiguous, aligned, and +writeable). The syntax is + +.. cfunction:: PyObject *PyArray_FROM_OTF(PyObject* obj, int typenum, int requirements) + + Return an ndarray from any Python object, *obj*, that can be + converted to an array. The number of dimensions in the returned + array is determined by the object. The desired data-type of the + returned array is provided in *typenum* which should be one of the + enumerated types. The *requirements* for the returned array can be + any combination of standard array flags. Each of these arguments + is explained in more detail below. You receive a new reference to + the array on success. On failure, ``NULL`` is returned and an + exception is set. + + *obj* + + The object can be any Python object convertable to an ndarray. + If the object is already (a subclass of) the ndarray that + satisfies the requirements then a new reference is returned. + Otherwise, a new array is constructed. The contents of *obj* + are copied to the new array unless the array interface is used + so that data does not have to be copied. Objects that can be + converted to an array include: 1) any nested sequence object, + 2) any object exposing the array interface, 3) any object with + an :obj:`__array__` method (which should return an ndarray), + and 4) any scalar object (becomes a zero-dimensional + array). Sub-classes of the ndarray that otherwise fit the + requirements will be passed through. If you want to ensure + a base-class ndarray, then use :cdata:`NPY_ENSUREARRAY` in the + requirements flag. A copy is made only if necessary. If you + want to guarantee a copy, then pass in :cdata:`NPY_ENSURECOPY` + to the requirements flag. + + *typenum* + + One of the enumerated types or :cdata:`NPY_NOTYPE` if the data-type + should be determined from the object itself. The C-based names + can be used: + + :cdata:`NPY_BOOL`, :cdata:`NPY_BYTE`, :cdata:`NPY_UBYTE`, + :cdata:`NPY_SHORT`, :cdata:`NPY_USHORT`, :cdata:`NPY_INT`, + :cdata:`NPY_UINT`, :cdata:`NPY_LONG`, :cdata:`NPY_ULONG`, + :cdata:`NPY_LONGLONG`, :cdata:`NPY_ULONGLONG`, :cdata:`NPY_DOUBLE`, + :cdata:`NPY_LONGDOUBLE`, :cdata:`NPY_CFLOAT`, :cdata:`NPY_CDOUBLE`, + :cdata:`NPY_CLONGDOUBLE`, :cdata:`NPY_OBJECT`. + + Alternatively, the bit-width names can be used as supported on the + platform. For example: + + :cdata:`NPY_INT8`, :cdata:`NPY_INT16`, :cdata:`NPY_INT32`, + :cdata:`NPY_INT64`, :cdata:`NPY_UINT8`, + :cdata:`NPY_UINT16`, :cdata:`NPY_UINT32`, + :cdata:`NPY_UINT64`, :cdata:`NPY_FLOAT32`, + :cdata:`NPY_FLOAT64`, :cdata:`NPY_COMPLEX64`, + :cdata:`NPY_COMPLEX128`. + + The object will be converted to the desired type only if it + can be done without losing precision. Otherwise ``NULL`` will + be returned and an error raised. Use :cdata:`NPY_FORCECAST` in the + requirements flag to override this behavior. + + *requirements* + + The memory model for an ndarray admits arbitrary strides in + each dimension to advance to the next element of the array. + Often, however, you need to interface with code that expects a + C-contiguous or a Fortran-contiguous memory layout. In + addition, an ndarray can be misaligned (the address of an + element is not at an integral multiple of the size of the + element) which can cause your program to crash (or at least + work more slowly) if you try and dereference a pointer into + the array data. Both of these problems can be solved by + converting the Python object into an array that is more + "well-behaved" for your specific usage. + + The requirements flag allows specification of what kind of array is + acceptable. If the object passed in does not satisfy this requirements + then a copy is made so that thre returned object will satisfy the + requirements. these ndarray can use a very generic pointer to memory. + This flag allows specification of the desired properties of the + returned array object. All of the flags are explained in the detailed + API chapter. The flags most commonly needed are :cdata:`NPY_IN_ARRAY`, + :cdata:`NPY_OUT_ARRAY`, and :cdata:`NPY_INOUT_ARRAY`: + + .. cvar:: NPY_IN_ARRAY + + Equivalent to :cdata:`NPY_CONTIGUOUS` \| + :cdata:`NPY_ALIGNED`. This combination of flags is useful + for arrays that must be in C-contiguous order and aligned. + These kinds of arrays are usually input arrays for some + algorithm. + + .. cvar:: NPY_OUT_ARRAY + + Equivalent to :cdata:`NPY_CONTIGUOUS` \| + :cdata:`NPY_ALIGNED` \| :cdata:`NPY_WRITEABLE`. This + combination of flags is useful to specify an array that is + in C-contiguous order, is aligned, and can be written to + as well. Such an array is usually returned as output + (although normally such output arrays are created from + scratch). + + .. cvar:: NPY_INOUT_ARRAY + + Equivalent to :cdata:`NPY_CONTIGUOUS` \| + :cdata:`NPY_ALIGNED` \| :cdata:`NPY_WRITEABLE` \| + :cdata:`NPY_UPDATEIFCOPY`. This combination of flags is + useful to specify an array that will be used for both + input and output. If a copy is needed, then when the + temporary is deleted (by your use of :cfunc:`Py_DECREF` at + the end of the interface routine), the temporary array + will be copied back into the original array passed in. Use + of the :cdata:`UPDATEIFCOPY` flag requires that the input + object is already an array (because other objects cannot + be automatically updated in this fashion). If an error + occurs use :cfunc:`PyArray_DECREF_ERR` (obj) on an array + with the :cdata:`NPY_UPDATEIFCOPY` flag set. This will + delete the array without causing the contents to be copied + back into the original array. + + + Other useful flags that can be OR'd as additional requirements are: + + .. cvar:: NPY_FORCECAST + + Cast to the desired type, even if it can't be done without losing + information. + + .. cvar:: NPY_ENSURECOPY + + Make sure the resulting array is a copy of the original. + + .. cvar:: NPY_ENSUREARRAY + + Make sure the resulting object is an actual ndarray and not a sub- + class. + +.. note:: + + Whether or not an array is byte-swapped is determined by the + data-type of the array. Native byte-order arrays are always + requested by :cfunc:`PyArray_FROM_OTF` and so there is no need for + a :cdata:`NPY_NOTSWAPPED` flag in the requirements argument. There + is also no way to get a byte-swapped array from this routine. + + +Creating a brand-new ndarray +---------------------------- + +Quite often new arrays must be created from within extension-module +code. Perhaps an output array is needed and you don't want the caller +to have to supply it. Perhaps only a temporary array is needed to hold +an intermediate calculation. Whatever the need there are simple ways +to get an ndarray object of whatever data-type is needed. The most +general function for doing this is :cfunc:`PyArray_NewFromDescr`. All array +creation functions go through this heavily re-used code. Because of +its flexibility, it can be somewhat confusing to use. As a result, +simpler forms exist that are easier to use. + +.. cfunction:: PyObject *PyArray_SimpleNew(int nd, npy_intp* dims, int typenum) + + This function allocates new memory and places it in an ndarray + with *nd* dimensions whose shape is determined by the array of + at least *nd* items pointed to by *dims*. The memory for the + array is uninitialized (unless typenum is :cdata:`PyArray_OBJECT` in + which case each element in the array is set to NULL). The + *typenum* argument allows specification of any of the builtin + data-types such as :cdata:`PyArray_FLOAT` or :cdata:`PyArray_LONG`. The + memory for the array can be set to zero if desired using + :cfunc:`PyArray_FILLWBYTE` (return_object, 0). + +.. cfunction:: PyObject *PyArray_SimpleNewFromData( int nd, npy_intp* dims, int typenum, void* data) + + Sometimes, you want to wrap memory allocated elsewhere into an + ndarray object for downstream use. This routine makes it + straightforward to do that. The first three arguments are the same + as in :cfunc:`PyArray_SimpleNew`, the final argument is a pointer to a + block of contiguous memory that the ndarray should use as it's + data-buffer which will be interpreted in C-style contiguous + fashion. A new reference to an ndarray is returned, but the + ndarray will not own its data. When this ndarray is deallocated, + the pointer will not be freed. + + You should ensure that the provided memory is not freed while the + returned array is in existence. The easiest way to handle this is + if data comes from another reference-counted Python object. The + reference count on this object should be increased after the + pointer is passed in, and the base member of the returned ndarray + should point to the Python object that owns the data. Then, when + the ndarray is deallocated, the base-member will be DECREF'd + appropriately. If you want the memory to be freed as soon as the + ndarray is deallocated then simply set the OWNDATA flag on the + returned ndarray. + + +Getting at ndarray memory and accessing elements of the ndarray +--------------------------------------------------------------- + +If obj is an ndarray (:ctype:`PyArrayObject *`), then the data-area of the +ndarray is pointed to by the void* pointer :cfunc:`PyArray_DATA` (obj) or +the char* pointer :cfunc:`PyArray_BYTES` (obj). Remember that (in general) +this data-area may not be aligned according to the data-type, it may +represent byte-swapped data, and/or it may not be writeable. If the +data area is aligned and in native byte-order, then how to get at a +specific element of the array is determined only by the array of +npy_intp variables, :cfunc:`PyArray_STRIDES` (obj). In particular, this +c-array of integers shows how many **bytes** must be added to the +current element pointer to get to the next element in each dimension. +For arrays less than 4-dimensions there are :cfunc:`PyArray_GETPTR{k}` +(obj, ...) macros where {k} is the integer 1, 2, 3, or 4 that make +using the array strides easier. The arguments .... represent {k} non- +negative integer indices into the array. For example, suppose ``E`` is +a 3-dimensional ndarray. A (void*) pointer to the element ``E[i,j,k]`` +is obtained as :cfunc:`PyArray_GETPTR3` (E, i, j, k). + +As explained previously, C-style contiguous arrays and Fortran-style +contiguous arrays have particular striding patterns. Two array flags +(:cdata:`NPY_C_CONTIGUOUS` and :cdata`NPY_F_CONTIGUOUS`) indicate +whether or not the striding pattern of a particular array matches the +C-style contiguous or Fortran-style contiguous or neither. Whether or +not the striding pattern matches a standard C or Fortran one can be +tested Using :cfunc:`PyArray_ISCONTIGUOUS` (obj) and +:cfunc:`PyArray_ISFORTRAN` (obj) respectively. Most third-party +libraries expect contiguous arrays. But, often it is not difficult to +support general-purpose striding. I encourage you to use the striding +information in your own code whenever possible, and reserve +single-segment requirements for wrapping third-party code. Using the +striding information provided with the ndarray rather than requiring a +contiguous striding reduces copying that otherwise must be made. + + +Example +======= + +.. index:: + single: extension module + +The following example shows how you might write a wrapper that accepts +two input arguments (that will be converted to an array) and an output +argument (that must be an array). The function returns None and +updates the output array. + +.. code-block:: c + + static PyObject * + example_wrapper(PyObject *dummy, PyObject *args) + { + PyObject *arg1=NULL, *arg2=NULL, *out=NULL; + PyObject *arr1=NULL, *arr2=NULL, *oarr=NULL; + + if (!PyArg_ParseTuple(args, OOO&, &arg1, *arg2, + &PyArrayType, *out)) return NULL; + + arr1 = PyArray_FROM_OTF(arg1, NPY_DOUBLE, NPY_IN_ARRAY); + if (arr1 == NULL) return NULL; + arr2 = PyArray_FROM_OTF(arg2, NPY_DOUBLE, NPY_IN_ARRAY); + if (arr2 == NULL) goto fail; + oarr = PyArray_FROM_OTF(out, NPY_DOUBLE, NPY_INOUT_ARRAY); + if (oarr == NULL) goto fail; + + /* code that makes use of arguments */ + /* You will probably need at least + nd = PyArray_NDIM(<..>) -- number of dimensions + dims = PyArray_DIMS(<..>) -- npy_intp array of length nd + showing length in each dim. + dptr = (double *)PyArray_DATA(<..>) -- pointer to data. + + If an error occurs goto fail. + */ + + Py_DECREF(arr1); + Py_DECREF(arr2); + Py_DECREF(oarr); + Py_INCREF(Py_None); + return Py_None; + + fail: + Py_XDECREF(arr1); + Py_XDECREF(arr2); + PyArray_XDECREF_ERR(oarr); + return NULL; + } diff --git a/doc/source/user/c-info.python-as-glue.rst b/doc/source/user/c-info.python-as-glue.rst new file mode 100644 index 000000000..0e0c73cd8 --- /dev/null +++ b/doc/source/user/c-info.python-as-glue.rst @@ -0,0 +1,1523 @@ +******************** +Using Python as glue +******************** + +| There is no conversation more boring than the one where everybody +| agrees. +| --- *Michel de Montaigne* + +| Duct tape is like the force. It has a light side, and a dark side, and +| it holds the universe together. +| --- *Carl Zwanzig* + +Many people like to say that Python is a fantastic glue language. +Hopefully, this Chapter will convince you that this is true. The first +adopters of Python for science were typically people who used it to +glue together large applicaton codes running on super-computers. Not +only was it much nicer to code in Python than in a shell script or +Perl, in addition, the ability to easily extend Python made it +relatively easy to create new classes and types specifically adapted +to the problems being solved. From the interactions of these early +contributors, Numeric emerged as an array-like object that could be +used to pass data between these applications. + +As Numeric has matured and developed into NumPy, people have been able +to write more code directly in NumPy. Often this code is fast-enough +for production use, but there are still times that there is a need to +access compiled code. Either to get that last bit of efficiency out of +the algorithm or to make it easier to access widely-available codes +written in C/C++ or Fortran. + +This chapter will review many of the tools that are available for the +purpose of accessing code written in other compiled languages. There +are many resources available for learning to call other compiled +libraries from Python and the purpose of this Chapter is not to make +you an expert. The main goal is to make you aware of some of the +possibilities so that you will know what to "Google" in order to learn more. + +The http://www.scipy.org website also contains a great deal of useful +information about many of these tools. For example, there is a nice +description of using several of the tools explained in this chapter at +http://www.scipy.org/PerformancePython. This link provides several +ways to solve the same problem showing how to use and connect with +compiled code to get the best performance. In the process you can get +a taste for several of the approaches that will be discussed in this +chapter. + + +Calling other compiled libraries from Python +============================================ + +While Python is a great language and a pleasure to code in, its +dynamic nature results in overhead that can cause some code ( *i.e.* +raw computations inside of for loops) to be up 10-100 times slower +than equivalent code written in a static compiled language. In +addition, it can cause memory usage to be larger than necessary as +temporary arrays are created and destroyed during computation. For +many types of computing needs the extra slow-down and memory +consumption can often not be spared (at least for time- or memory- +critical portions of your code). Therefore one of the most common +needs is to call out from Python code to a fast, machine-code routine +(e.g. compiled using C/C++ or Fortran). The fact that this is +relatively easy to do is a big reason why Python is such an excellent +high-level language for scientific and engineering programming. + +Their are two basic approaches to calling compiled code: writing an +extension module that is then imported to Python using the import +command, or calling a shared-library subroutine directly from Python +using the ctypes module (included in the standard distribution with +Python 2.5). The first method is the most common (but with the +inclusion of ctypes into Python 2.5 this status may change). + +.. warning:: + + Calling C-code from Python can result in Python crashes if you are not + careful. None of the approaches in this chapter are immune. You have + to know something about the way data is handled by both NumPy and by + the third-party library being used. + + +Hand-generated wrappers +======================= + +Extension modules were discussed in Chapter `1 +<#sec-writing-an-extension>`__ . The most basic way to interface with +compiled code is to write an extension module and construct a module +method that calls the compiled code. For improved readability, your +method should take advantage of the PyArg_ParseTuple call to convert +between Python objects and C data-types. For standard C data-types +there is probably already a built-in converter. For others you may +need to write your own converter and use the "O&" format string which +allows you to specify a function that will be used to perform the +conversion from the Python object to whatever C-structures are needed. + +Once the conversions to the appropriate C-structures and C data-types +have been performed, the next step in the wrapper is to call the +underlying function. This is straightforward if the underlying +function is in C or C++. However, in order to call Fortran code you +must be familiar with how Fortran subroutines are called from C/C++ +using your compiler and platform. This can vary somewhat platforms and +compilers (which is another reason f2py makes life much simpler for +interfacing Fortran code) but generally involves underscore mangling +of the name and the fact that all variables are passed by reference +(i.e. all arguments are pointers). + +The advantage of the hand-generated wrapper is that you have complete +control over how the C-library gets used and called which can lead to +a lean and tight interface with minimal over-head. The disadvantage is +that you have to write, debug, and maintain C-code, although most of +it can be adapted using the time-honored technique of +"cutting-pasting-and-modifying" from other extension modules. Because, +the procedure of calling out to additional C-code is fairly +regimented, code-generation procedures have been developed to make +this process easier. One of these code- generation techniques is +distributed with NumPy and allows easy integration with Fortran and +(simple) C code. This package, f2py, will be covered briefly in the +next session. + + +f2py +==== + +F2py allows you to automatically construct an extension module that +interfaces to routines in Fortran 77/90/95 code. It has the ability to +parse Fortran 77/90/95 code and automatically generate Python +signatures for the subroutines it encounters, or you can guide how the +subroutine interfaces with Python by constructing an interface- +defintion-file (or modifying the f2py-produced one). + +.. index:: + single: f2py + +Creating source for a basic extension module +-------------------------------------------- + +Probably the easiest way to introduce f2py is to offer a simple +example. Here is one of the subroutines contained in a file named +:file:`add.f`: + +.. code-block:: none + + C + SUBROUTINE ZADD(A,B,C,N) + C + DOUBLE COMPLEX A(*) + DOUBLE COMPLEX B(*) + DOUBLE COMPLEX C(*) + INTEGER N + DO 20 J = 1, N + C(J) = A(J)+B(J) + 20 CONTINUE + END + +This routine simply adds the elements in two contiguous arrays and +places the result in a third. The memory for all three arrays must be +provided by the calling routine. A very basic interface to this +routine can be automatically generated by f2py:: + + f2py -m add add.f + +You should be able to run this command assuming your search-path is +set-up properly. This command will produce an extension module named +addmodule.c in the current directory. This extension module can now be +compiled and used from Python just like any other extension module. + + +Creating a compiled extension module +------------------------------------ + +You can also get f2py to compile add.f and also compile its produced +extension module leaving only a shared-library extension file that can +be imported from Python:: + + f2py -c -m add add.f + +This command leaves a file named add.{ext} in the current directory +(where {ext} is the appropriate extension for a python extension +module on your platform --- so, pyd, *etc.* ). This module may then be +imported from Python. It will contain a method for each subroutin in +add (zadd, cadd, dadd, sadd). The docstring of each method contains +information about how the module method may be called: + + >>> import add + >>> print add.zadd.__doc__ + zadd - Function signature: + zadd(a,b,c,n) + Required arguments: + a : input rank-1 array('D') with bounds (*) + b : input rank-1 array('D') with bounds (*) + c : input rank-1 array('D') with bounds (*) + n : input int + + +Improving the basic interface +----------------------------- + +The default interface is a very literal translation of the fortran +code into Python. The Fortran array arguments must now be NumPy arrays +and the integer argument should be an integer. The interface will +attempt to convert all arguments to their required types (and shapes) +and issue an error if unsuccessful. However, because it knows nothing +about the semantics of the arguments (such that C is an output and n +should really match the array sizes), it is possible to abuse this +function in ways that can cause Python to crash. For example: + + >>> add.zadd([1,2,3],[1,2],[3,4],1000) + +will cause a program crash on most systems. Under the covers, the +lists are being converted to proper arrays but then the underlying add +loop is told to cycle way beyond the borders of the allocated memory. + +In order to improve the interface, directives should be provided. This +is accomplished by constructing an interface definition file. It is +usually best to start from the interface file that f2py can produce +(where it gets its default behavior from). To get f2py to generate the +interface file use the -h option:: + + f2py -h add.pyf -m add add.f + +This command leaves the file add.pyf in the current directory. The +section of this file corresponding to zadd is: + +.. code-block:: none + + subroutine zadd(a,b,c,n) ! in :add:add.f + double complex dimension(*) :: a + double complex dimension(*) :: b + double complex dimension(*) :: c + integer :: n + end subroutine zadd + +By placing intent directives and checking code, the interface can be +cleaned up quite a bit until the Python module method is both easier +to use and more robust. + +.. code-block:: none + + subroutine zadd(a,b,c,n) ! in :add:add.f + double complex dimension(n) :: a + double complex dimension(n) :: b + double complex intent(out),dimension(n) :: c + integer intent(hide),depend(a) :: n=len(a) + end subroutine zadd + +The intent directive, intent(out) is used to tell f2py that ``c`` is +an output variable and should be created by the interface before being +passed to the underlying code. The intent(hide) directive tells f2py +to not allow the user to specify the variable, ``n``, but instead to +get it from the size of ``a``. The depend( ``a`` ) directive is +necessary to tell f2py that the value of n depends on the input a (so +that it won't try to create the variable n until the variable a is +created). + +The new interface has docstring: + + >>> print add.zadd.__doc__ + zadd - Function signature: + c = zadd(a,b) + Required arguments: + a : input rank-1 array('D') with bounds (n) + b : input rank-1 array('D') with bounds (n) + Return objects: + c : rank-1 array('D') with bounds (n) + +Now, the function can be called in a much more robust way: + + >>> add.zadd([1,2,3],[4,5,6]) + array([ 5.+0.j, 7.+0.j, 9.+0.j]) + +Notice the automatic conversion to the correct format that occurred. + + +Inserting directives in Fortran source +-------------------------------------- + +The nice interface can also be generated automatically by placing the +variable directives as special comments in the original fortran code. +Thus, if I modify the source code to contain: + +.. code-block:: none + + C + SUBROUTINE ZADD(A,B,C,N) + C + CF2PY INTENT(OUT) :: C + CF2PY INTENT(HIDE) :: N + CF2PY DOUBLE COMPLEX :: A(N) + CF2PY DOUBLE COMPLEX :: B(N) + CF2PY DOUBLE COMPLEX :: C(N) + DOUBLE COMPLEX A(*) + DOUBLE COMPLEX B(*) + DOUBLE COMPLEX C(*) + INTEGER N + DO 20 J = 1, N + C(J) = A(J) + B(J) + 20 CONTINUE + END + +Then, I can compile the extension module using:: + + f2py -c -m add add.f + +The resulting signature for the function add.zadd is exactly the same +one that was created previously. If the original source code had +contained A(N) instead of A(\*) and so forth with B and C, then I +could obtain (nearly) the same interface simply by placing the +INTENT(OUT) :: C comment line in the source code. The only difference +is that N would be an optional input that would default to the length +of A. + + +A filtering example +------------------- + +For comparison with the other methods to be discussed. Here is another +example of a function that filters a two-dimensional array of double +precision floating-point numbers using a fixed averaging filter. The +advantage of using Fortran to index into multi-dimensional arrays +should be clear from this example. + +.. code-block:: none + + SUBROUTINE DFILTER2D(A,B,M,N) + C + DOUBLE PRECISION A(M,N) + DOUBLE PRECISION B(M,N) + INTEGER N, M + CF2PY INTENT(OUT) :: B + CF2PY INTENT(HIDE) :: N + CF2PY INTENT(HIDE) :: M + DO 20 I = 2,M-1 + DO 40 J=2,N-1 + B(I,J) = A(I,J) + + $ (A(I-1,J)+A(I+1,J) + + $ A(I,J-1)+A(I,J+1) )*0.5D0 + + $ (A(I-1,J-1) + A(I-1,J+1) + + $ A(I+1,J-1) + A(I+1,J+1))*0.25D0 + 40 CONTINUE + 20 CONTINUE + END + +This code can be compiled and linked into an extension module named +filter using:: + + f2py -c -m filter filter.f + +This will produce an extension module named filter.so in the current +directory with a method named dfilter2d that returns a filtered +version of the input. + + +Calling f2py from Python +------------------------ + +The f2py program is written in Python and can be run from inside your +module. This provides a facility that is somewhat similar to the use +of weave.ext_tools described below. An example of the final interface +executed using Python code is: + +.. code-block:: python + + import numpy.f2py as f2py + fid = open('add.f') + source = fid.read() + fid.close() + f2py.compile(source, modulename='add') + import add + +The source string can be any valid Fortran code. If you want to save +the extension-module source code then a suitable file-name can be +provided by the source_fn keyword to the compile function. + + +Automatic extension module generation +------------------------------------- + +If you want to distribute your f2py extension module, then you only +need to include the .pyf file and the Fortran code. The distutils +extensions in NumPy allow you to define an extension module entirely +in terms of this interface file. A valid setup.py file allowing +distribution of the add.f module (as part of the package f2py_examples +so that it would be loaded as f2py_examples.add) is: + +.. code-block:: python + + def configuration(parent_package='', top_path=None) + from numpy.distutils.misc_util import Configuration + config = Configuration('f2py_examples',parent_package, top_path) + config.add_extension('add', sources=['add.pyf','add.f']) + return config + + if __name__ == '__main__': + from numpy.distutils.core import setup + setup(**configuration(top_path='').todict()) + +Installation of the new package is easy using:: + + python setup.py install + +assuming you have the proper permissions to write to the main site- +packages directory for the version of Python you are using. For the +resulting package to work, you need to create a file named __init__.py +(in the same directory as add.pyf). Notice the extension module is +defined entirely in terms of the "add.pyf" and "add.f" files. The +conversion of the .pyf file to a .c file is handled by numpy.disutils. + + +Conclusion +---------- + +The interface definition file (.pyf) is how you can fine-tune the +interface between Python and Fortran. There is decent documentation +for f2py found in the numpy/f2py/docs directory where-ever NumPy is +installed on your system (usually under site-packages). There is also +more information on using f2py (including how to use it to wrap C +codes) at http://www.scipy.org/Cookbook under the "Using NumPy with +Other Languages" heading. + +The f2py method of linking compiled code is currently the most +sophisticated and integrated approach. It allows clean separation of +Python with compiled code while still allowing for separate +distribution of the extension module. The only draw-back is that it +requires the existence of a Fortran compiler in order for a user to +install the code. However, with the existence of the free-compilers +g77, gfortran, and g95, as well as high-quality commerical compilers, +this restriction is not particularly onerous. In my opinion, Fortran +is still the easiest way to write fast and clear code for scientific +computing. It handles complex numbers, and multi-dimensional indexing +in the most straightforward way. Be aware, however, that some Fortran +compilers will not be able to optimize code as well as good hand- +written C-code. + +.. index:: + single: f2py + + +weave +===== + +Weave is a scipy package that can be used to automate the process of +extending Python with C/C++ code. It can be used to speed up +evaluation of an array expression that would otherwise create +temporary variables, to directly "inline" C/C++ code into Python, or +to create a fully-named extension module. You must either install +scipy or get the weave package separately and install it using the +standard python setup.py install. You must also have a C/C++-compiler +installed and useable by Python distutils in order to use weave. + +.. index:: + single: weave + +Somewhat dated, but still useful documentation for weave can be found +at the link http://www.scipy/Weave. There are also many examples found +in the examples directory which is installed under the weave directory +in the place where weave is installed on your system. + + +Speed up code involving arrays (also see scipy.numexpr) +------------------------------------------------------- + +This is the easiest way to use weave and requires minimal changes to +your Python code. It involves placing quotes around the expression of +interest and calling weave.blitz. Weave will parse the code and +generate C++ code using Blitz C++ arrays. It will then compile the +code and catalog the shared library so that the next time this exact +string is asked for (and the array types are the same), the already- +compiled shared library will be loaded and used. Because Blitz makes +extensive use of C++ templating, it can take a long time to compile +the first time. After that, however, the code should evaluate more +quickly than the equivalent NumPy expression. This is especially true +if your array sizes are large and the expression would require NumPy +to create several temporaries. Only expressions involving basic +arithmetic operations and basic array slicing can be converted to +Blitz C++ code. + +For example, consider the expression:: + + d = 4*a + 5*a*b + 6*b*c + +where a, b, and c are all arrays of the same type and shape. When the +data-type is double-precision and the size is 1000x1000, this +expression takes about 0.5 seconds to compute on an 1.1Ghz AMD Athlon +machine. When this expression is executed instead using blitz: + +.. code-block:: python + + d = empty(a.shape, 'd'); weave.blitz(expr) + +execution time is only about 0.20 seconds (about 0.14 seconds spent in +weave and the rest in allocating space for d). Thus, we've sped up the +code by a factor of 2 using only a simnple command (weave.blitz). Your +mileage may vary, but factors of 2-8 speed-ups are possible with this +very simple technique. + +If you are interested in using weave in this way, then you should also +look at scipy.numexpr which is another similar way to speed up +expressions by eliminating the need for temporary variables. Using +numexpr does not require a C/C++ compiler. + + +Inline C-code +------------- + +Probably the most widely-used method of employing weave is to +"in-line" C/C++ code into Python in order to speed up a time-critical +section of Python code. In this method of using weave, you define a +string containing useful C-code and then pass it to the function +**weave.inline** ( ``code_string``, ``variables`` ), where +code_string is a string of valid C/C++ code and variables is a list of +variables that should be passed in from Python. The C/C++ code should +refer to the variables with the same names as they are defined with in +Python. If weave.line should return anything the the special value +return_val should be set to whatever object should be returned. The +following example shows how to use weave on basic Python objects: + +.. code-block:: python + + code = r""" + int i; + py::tuple results(2); + for (i=0; i<a.length(); i++) { + a[i] = i; + } + results[0] = 3.0; + results[1] = 4.0; + return_val = results; + """ + a = [None]*10 + res = weave.inline(code,['a']) + +The C++ code shown in the code string uses the name 'a' to refer to +the Python list that is passed in. Because the Python List is a +mutable type, the elements of the list itself are modified by the C++ +code. A set of C++ classes are used to access Python objects using +simple syntax. + +The main advantage of using C-code, however, is to speed up processing +on an array of data. Accessing a NumPy array in C++ code using weave, +depends on what kind of type converter is chosen in going from NumPy +arrays to C++ code. The default converter creates 5 variables for the +C-code for every NumPy array passed in to weave.inline. The following +table shows these variables which can all be used in the C++ code. The +table assumes that ``myvar`` is the name of the array in Python with +data-type {dtype} (i.e. float64, float32, int8, etc.) + +=========== ============== ========================================= +Variable Type Contents +=========== ============== ========================================= +myvar {dtype}* Pointer to the first element of the array +Nmyvar npy_intp* A pointer to the dimensions array +Smyvar npy_intp* A pointer to the strides array +Dmyvar int The number of dimensions +myvar_array PyArrayObject* The entire structure for the array +=========== ============== ========================================= + +The in-lined code can contain references to any of these variables as +well as to the standard macros MYVAR1(i), MYVAR2(i,j), MYVAR3(i,j,k), +and MYVAR4(i,j,k,l). These name-based macros (they are the Python name +capitalized followed by the number of dimensions needed) will de- +reference the memory for the array at the given location with no error +checking (be-sure to use the correct macro and ensure the array is +aligned and in correct byte-swap order in order to get useful +results). The following code shows how you might use these variables +and macros to code a loop in C that computes a simple 2-d weighted +averaging filter. + +.. code-block:: c++ + + int i,j; + for(i=1;i<Na[0]-1;i++) { + for(j=1;j<Na[1]-1;j++) { + B2(i,j) = A2(i,j) + (A2(i-1,j) + + A2(i+1,j)+A2(i,j-1) + + A2(i,j+1))*0.5 + + (A2(i-1,j-1) + + A2(i-1,j+1) + + A2(i+1,j-1) + + A2(i+1,j+1))*0.25 + } + } + +The above code doesn't have any error checking and so could fail with +a Python crash if, ``a`` had the wrong number of dimensions, or ``b`` +did not have the same shape as ``a``. However, it could be placed +inside a standard Python function with the necessary error checking to +produce a robust but fast subroutine. + +One final note about weave.inline: if you have additional code you +want to include in the final extension module such as supporting +function calls, include statments, etc. you can pass this code in as a +string using the keyword support_code: ``weave.inline(code, variables, +support_code=support)``. If you need the extension module to link +against an additional library then you can also pass in +distutils-style keyword arguments such as library_dirs, libraries, +and/or runtime_library_dirs which point to the appropriate libraries +and directories. + +Simplify creation of an extension module +---------------------------------------- + +The inline function creates one extension module for each function to- +be inlined. It also generates a lot of intermediate code that is +duplicated for each extension module. If you have several related +codes to execute in C, it would be better to make them all separate +functions in a single extension module with multiple functions. You +can also use the tools weave provides to produce this larger extension +module. In fact, the weave.inline function just uses these more +general tools to do its work. + +The approach is to: + +1. construct a extension module object using + ext_tools.ext_module(``module_name``); + +2. create function objects using ext_tools.ext_function(``func_name``, + ``code``, ``variables``); + +3. (optional) add support code to the function using the + .customize.add_support_code( ``support_code`` ) method of the + function object; + +4. add the functions to the extension module object using the + .add_function(``func``) method; + +5. when all the functions are added, compile the extension with its + .compile() method. + +Several examples are available in the examples directory where weave +is installed on your system. Look particularly at ramp2.py, +increment_example.py and fibonacii.py + + +Conclusion +---------- + +Weave is a useful tool for quickly routines in C/C++ and linking them +into Python. It's caching-mechanism allows for on-the-fly compilation +which makes it particularly attractive for in-house code. Because of +the requirement that the user have a C++-compiler, it can be difficult +(but not impossible) to distribute a package that uses weave to other +users who don't have a compiler installed. Of course, weave could be +used to construct an extension module which is then distributed in the +normal way *(* using a setup.py file). While you can use weave to +build larger extension modules with many methods, creating methods +with a variable- number of arguments is not possible. Thus, for a more +sophisticated module, you will still probably want a Python-layer that +calls the weave-produced extension. + +.. index:: + single: weave + + +Pyrex +===== + +Pyrex is a way to write C-extension modules using Python-like syntax. +It is an interesting way to generate extension modules that is growing +in popularity, particularly among people who have rusty or non- +existent C-skills. It does require the user to write the "interface" +code and so is more time-consuming than SWIG or f2py if you are trying +to interface to a large library of code. However, if you are writing +an extension module that will include quite a bit of your own +algorithmic code, as well, then Pyrex is a good match. A big weakness +perhaps is the inability to easily and quickly access the elements of +a multidimensional array. + +.. index:: + single: pyrex + +Notice that Pyrex is an extension-module generator only. Unlike weave +or f2py, it includes no automatic facility for compiling and linking +the extension module (which must be done in the usual fashion). It +does provide a modified distutils class called build_ext which lets +you build an extension module from a .pyx source. Thus, you could +write in a setup.py file: + +.. code-block:: python + + from Pyrex.Distutils import build_ext + from distutils.extension import Extension + from distutils.core import setup + + import numpy + py_ext = Extension('mine', ['mine.pyx'], + include_dirs=[numpy.get_include()]) + + setup(name='mine', description='Nothing', + ext_modules=[pyx_ext], + cmdclass = {'build_ext':build_ext}) + +Adding the NumPy include directory is, of course, only necessary if +you are using NumPy arrays in the extension module (which is what I +assume you are using Pyrex for). The distutils extensions in NumPy +also include support for automatically producing the extension-module +and linking it from a ``.pyx`` file. It works so that if the user does +not have Pyrex installed, then it looks for a file with the same +file-name but a ``.c`` extension which it then uses instead of trying +to produce the ``.c`` file again. + +Pyrex does not natively understand NumPy arrays. However, it is not +difficult to include information that lets Pyrex deal with them +usefully. In fact, the numpy.random.mtrand module was written using +Pyrex so an example of Pyrex usage is already included in the NumPy +source distribution. That experience led to the creation of a standard +c_numpy.pxd file that you can use to simplify interacting with NumPy +array objects in a Pyrex-written extension. The file may not be +complete (it wasn't at the time of this writing). If you have +additions you'd like to contribute, please send them. The file is +located in the .../site-packages/numpy/doc/pyrex directory where you +have Python installed. There is also an example in that directory of +using Pyrex to construct a simple extension module. It shows that +Pyrex looks a lot like Python but also contains some new syntax that +is necessary in order to get C-like speed. + +If you just use Pyrex to compile a standard Python module, then you +will get a C-extension module that runs either as fast or, possibly, +more slowly than the equivalent Python module. Speed increases are +possible only when you use cdef to statically define C variables and +use a special construct to create for loops: + +.. code-block:: none + + cdef int i + for i from start <= i < stop + +Let's look at two examples we've seen before to see how they might be +implemented using Pyrex. These examples were compiled into extension +modules using Pyrex-0.9.3.1. + + +Pyrex-add +--------- + +Here is part of a Pyrex-file I named add.pyx which implements the add +functions we previously implemented using f2py: + +.. code-block:: none + + cimport c_numpy + from c_numpy cimport import_array, ndarray, npy_intp, npy_cdouble, \ + npy_cfloat, NPY_DOUBLE, NPY_CDOUBLE, NPY_FLOAT, \ + NPY_CFLOAT + + #We need to initialize NumPy + import_array() + + def zadd(object ao, object bo): + cdef ndarray c, a, b + cdef npy_intp i + a = c_numpy.PyArray_ContiguousFromAny(ao, + NPY_CDOUBLE, 1, 1) + b = c_numpy.PyArray_ContiguousFromAny(bo, + NPY_CDOUBLE, 1, 1) + c = c_numpy.PyArray_SimpleNew(a.nd, a.dimensions, + a.descr.type_num) + for i from 0 <= i < a.dimensions[0]: + (<npy_cdouble *>c.data)[i].real = \ + (<npy_cdouble *>a.data)[i].real + \ + (<npy_cdouble *>b.data)[i].real + (<npy_cdouble *>c.data)[i].imag = \ + (<npy_cdouble *>a.data)[i].imag + \ + (<npy_cdouble *>b.data)[i].imag + return c + +This module shows use of the ``cimport`` statement to load the +definitions from the c_numpy.pxd file. As shown, both versions of the +import statement are supported. It also shows use of the NumPy C-API +to construct NumPy arrays from arbitrary input objects. The array c is +created using PyArray_SimpleNew. Then the c-array is filled by +addition. Casting to a particiular data-type is accomplished using +<cast \*>. Pointers are de-referenced with bracket notation and +members of structures are accessed using '.' notation even if the +object is techinically a pointer to a structure. The use of the +special for loop construct ensures that the underlying code will have +a similar C-loop so the addition calculation will proceed quickly. +Notice that we have not checked for NULL after calling to the C-API +--- a cardinal sin when writing C-code. For routines that return +Python objects, Pyrex inserts the checks for NULL into the C-code for +you and returns with failure if need be. There is also a way to get +Pyrex to automatically check for exceptions when you call functions +that don't return Python objects. See the documentation of Pyrex for +details. + + +Pyrex-filter +------------ + +The two-dimensional example we created using weave is a bit uglierto +implement in Pyrex because two-dimensional indexing using Pyrex is not +as simple. But, it is straightforward (and possibly faster because of +pre-computed indices). Here is the Pyrex-file I named image.pyx. + +.. code-block:: none + + cimport c_numpy + from c_numpy cimport import_array, ndarray, npy_intp,\ + NPY_DOUBLE, NPY_CDOUBLE, \ + NPY_FLOAT, NPY_CFLOAT, NPY_ALIGNED \ + + #We need to initialize NumPy + import_array() + def filter(object ao): + cdef ndarray a, b + cdef npy_intp i, j, M, N, oS + cdef npy_intp r,rm1,rp1,c,cm1,cp1 + cdef double value + # Require an ALIGNED array + # (but not necessarily contiguous) + # We will use strides to access the elements. + a = c_numpy.PyArray_FROMANY(ao, NPY_DOUBLE, \ + 2, 2, NPY_ALIGNED) + b = c_numpy.PyArray_SimpleNew(a.nd,a.dimensions, \ + a.descr.type_num) + M = a.dimensions[0] + N = a.dimensions[1] + S0 = a.strides[0] + S1 = a.strides[1] + for i from 1 <= i < M-1: + r = i*S0 + rm1 = r-S0 + rp1 = r+S0 + oS = i*N + for j from 1 <= j < N-1: + c = j*S1 + cm1 = c-S1 + cp1 = c+S1 + (<double *>b.data)[oS+j] = \ + (<double *>(a.data+r+c))[0] + \ + ((<double *>(a.data+rm1+c))[0] + \ + (<double *>(a.data+rp1+c))[0] + \ + (<double *>(a.data+r+cm1))[0] + \ + (<double *>(a.data+r+cp1))[0])*0.5 + \ + ((<double *>(a.data+rm1+cm1))[0] + \ + (<double *>(a.data+rp1+cm1))[0] + \ + (<double *>(a.data+rp1+cp1))[0] + \ + (<double *>(a.data+rm1+cp1))[0])*0.25 + return b + +This 2-d averaging filter runs quickly because the loop is in C and +the pointer computations are done only as needed. However, it is not +particularly easy to understand what is happening. A 2-d image, ``in`` +, can be filtered using this code very quickly using: + +.. code-block:: python + + import image + out = image.filter(in) + + +Conclusion +---------- + +There are several disadvantages of using Pyrex: + +1. The syntax for Pyrex can get a bit bulky, and it can be confusing at + first to understand what kind of objects you are getting and how to + interface them with C-like constructs. + +2. Inappropriate Pyrex syntax or incorrect calls to C-code or type- + mismatches can result in failures such as + + 1. Pyrex failing to generate the extension module source code, + + 2. Compiler failure while generating the extension module binary due to + incorrect C syntax, + + 3. Python failure when trying to use the module. + + +3. It is easy to lose a clean separation between Python and C which makes + re-using your C-code for other non-Python-related projects more + difficult. + +4. Multi-dimensional arrays are "bulky" to index (appropriate macros + may be able to fix this). + +5. The C-code generated by Prex is hard to read and modify (and typically + compiles with annoying but harmless warnings). + +Writing a good Pyrex extension module still takes a bit of effort +because not only does it require (a little) familiarity with C, but +also with Pyrex's brand of Python-mixed-with C. One big advantage of +Pyrex-generated extension modules is that they are easy to distribute +using distutils. In summary, Pyrex is a very capable tool for either +gluing C-code or generating an extension module quickly and should not +be over-looked. It is especially useful for people that can't or won't +write C-code or Fortran code. But, if you are already able to write +simple subroutines in C or Fortran, then I would use one of the other +approaches such as f2py (for Fortran), ctypes (for C shared- +libraries), or weave (for inline C-code). + +.. index:: + single: pyrex + + + + +ctypes +====== + +Ctypes is a python extension module (downloaded separately for Python +<2.5 and included with Python 2.5) that allows you to call an +arbitrary function in a shared library directly from Python. This +approach allows you to interface with C-code directly from Python. +This opens up an enormous number of libraries for use from Python. The +drawback, however, is that coding mistakes can lead to ugly program +crashes very easily (just as can happen in C) because there is little +type or bounds checking done on the parameters. This is especially +true when array data is passed in as a pointer to a raw memory +location. The responsibility is then on you that the subroutine will +not access memory outside the actual array area. But, if you don't +mind living a little dangerously ctypes can be an effective tool for +quickly taking advantage of a large shared library (or writing +extended functionality in your own shared library). + +.. index:: + single: ctypes + +Because the ctypes approach exposes a raw interface to the compiled +code it is not always tolerant of user mistakes. Robust use of the +ctypes module typically involves an additional layer of Python code in +order to check the data types and array bounds of objects passed to +the underlying subroutine. This additional layer of checking (not to +mention the conversion from ctypes objects to C-data-types that ctypes +itself performs), will make the interface slower than a hand-written +extension-module interface. However, this overhead should be neglible +if the C-routine being called is doing any significant amount of work. +If you are a great Python programmer with weak C-skills, ctypes is an +easy way to write a useful interface to a (shared) library of compiled +code. + +To use c-types you must + +1. Have a shared library. + +2. Load the shared library. + +3. Convert the python objects to ctypes-understood arguments. + +4. Call the function from the library with the ctypes arguments. + + +Having a shared library +----------------------- + +There are several requirements for a shared library that can be used +with c-types that are platform specific. This guide assumes you have +some familiarity with making a shared library on your system (or +simply have a shared library available to you). Items to remember are: + +- A shared library must be compiled in a special way ( *e.g.* using + the -shared flag with gcc). + +- On some platforms (*e.g.* Windows) , a shared library requires a + .def file that specifies the functions to be exported. For example a + mylib.def file might contain. + + :: + + LIBRARY mylib.dll + EXPORTS + cool_function1 + cool_function2 + + Alternatively, you may be able to use the storage-class specifier + __declspec(dllexport) in the C-definition of the function to avoid the + need for this .def file. + +There is no standard way in Python distutils to create a standard +shared library (an extension module is a "special" shared library +Python understands) in a cross-platform manner. Thus, a big +disadvantage of ctypes at the time of writing this book is that it is +difficult to distribute in a cross-platform manner a Python extension +that uses c-types and includes your own code which should be compiled +as a shared library on the users system. + + +Loading the shared library +-------------------------- + +A simple, but robust way to load the shared library is to get the +absolute path name and load it using the cdll object of ctypes.: + +.. code-block:: python + + lib = ctypes.cdll[<full_path_name>] + +However, on Windows accessing an attribute of the cdll method will +load the first DLL by that name found in the current directory or on +the PATH. Loading the absolute path name requires a little finesse for +cross-platform work since the extension of shared libraries varies. +There is a ``ctypes.util.find_library`` utility available that can +simplify the process of finding the library to load but it is not +foolproof. Complicating matters, different platforms have different +default extensions used by shared libraries (e.g. .dll -- Windows, .so +-- Linux, .dylib -- Mac OS X). This must also be taken into account if +you are using c-types to wrap code that needs to work on several +platforms. + +NumPy provides a convenience function called +:func:`ctypeslib.load_library` (name, path). This function takes the name +of the shared library (including any prefix like 'lib' but excluding +the extension) and a path where the shared library can be located. It +returns a ctypes library object or raises an OSError if the library +cannot be found or raises an ImportError if the ctypes module is not +available. (Windows users: the ctypes library object loaded using +:func:`load_library` is always loaded assuming cdecl calling convention. +See the ctypes documentation under ctypes.windll and/or ctypes.oledll +for ways to load libraries under other calling conventions). + +The functions in the shared library are available as attributes of the +ctypes library object (returned from :func:`ctypeslib.load_library`) or +as items using ``lib['func_name']`` syntax. The latter method for +retrieving a function name is particularly useful if the function name +contains characters that are not allowable in Python variable names. + + +Converting arguments +-------------------- + +Python ints/longs, strings, and unicode objects are automatically +converted as needed to equivalent c-types arguments The None object is +also converted automatically to a NULL pointer. All other Python +objects must be converted to ctypes-specific types. There are two ways +around this restriction that allow c-types to integrate with other +objects. + +1. Don't set the argtypes attribute of the function object and define an + :obj:`_as_parameter_` method for the object you want to pass in. The + :obj:`_as_parameter_` method must return a Python int which will be passed + directly to the function. + +2. Set the argtypes attribute to a list whose entries contain objects + with a classmethod named from_param that knows how to convert your + object to an object that ctypes can understand (an int/long, string, + unicode, or object with the :obj:`_as_parameter_` attribute). + +NumPy uses both methods with a preference for the second method +because it can be safer. The ctypes attribute of the ndarray returns +an object that has an _as_parameter\_ attribute which returns an +integer representing the address of the ndarray to which it is +associated. As a result, one can pass this ctypes attribute object +directly to a function expecting a pointer to the data in your +ndarray. The caller must be sure that the ndarray object is of the +correct type, shape, and has the correct flags set or risk nasty +crashes if the data-pointer to inappropriate arrays are passsed in. + +To implement the second method, NumPy provides the class-factory +function :func:`ndpointer` in the :mod:`ctypeslib` module. This +class-factory function produces an appropriate class that can be +placed in an argtypes attribute entry of a ctypes function. The class +will contain a from_param method which ctypes will use to convert any +ndarray passed in to the function to a ctypes-recognized object. In +the process, the conversion will perform checking on any properties of +the ndarray that were specified by the user in the call to :func:`ndpointer`. +Aspects of the ndarray that can be checked include the data-type, the +number-of-dimensions, the shape, and/or the state of the flags on any +array passed. The return value of the from_param method is the ctypes +attribute of the array which (because it contains the _as_parameter\_ +attribute pointing to the array data area) can be used by ctypes +directly. + +The ctypes attribute of an ndarray is also endowed with additional +attributes that may be convenient when passing additional information +about the array into a ctypes function. The attributes **data**, +**shape**, and **strides** can provide c-types compatible types +corresponding to the data-area, the shape, and the strides of the +array. The data attribute reutrns a ``c_void_p`` representing a +pointer to the data area. The shape and strides attributes each return +an array of ctypes integers (or None representing a NULL pointer, if a +0-d array). The base ctype of the array is a ctype integer of the same +size as a pointer on the platform. There are also methods +data_as({ctype}), shape_as(<base ctype>), and strides_as(<base +ctype>). These return the data as a ctype object of your choice and +the shape/strides arrays using an underlying base type of your choice. +For convenience, the **ctypeslib** module also contains **c_intp** as +a ctypes integer data-type whose size is the same as the size of +``c_void_p`` on the platform (it's value is None if ctypes is not +installed). + + +Calling the function +-------------------- + +The function is accessed as an attribute of or an item from the loaded +shared-library. Thus, if "./mylib.so" has a function named +"cool_function1" , I could access this function either as: + +.. code-block:: python + + lib = numpy.ctypeslib.load_library('mylib','.') + func1 = lib.cool_function1 # or equivalently + func1 = lib['cool_function1'] + +In ctypes, the return-value of a function is set to be 'int' by +default. This behavior can be changed by setting the restype attribute +of the function. Use None for the restype if the function has no +return value ('void'): + +.. code-block:: python + + func1.restype = None + +As previously discussed, you can also set the argtypes attribute of +the function in order to have ctypes check the types of the input +arguments when the function is called. Use the :func:`ndpointer` factory +function to generate a ready-made class for data-type, shape, and +flags checking on your new function. The :func:`ndpointer` function has the +signature + +.. function:: ndpointer(dtype=None, ndim=None, shape=None, flags=None) + + Keyword arguments with the value ``None`` are not checked. + Specifying a keyword enforces checking of that aspect of the + ndarray on conversion to a ctypes-compatible object. The dtype + keyword can be any object understood as a data-type object. The + ndim keyword should be an integer, and the shape keyword should be + an integer or a sequence of integers. The flags keyword specifies + the minimal flags that are required on any array passed in. This + can be specified as a string of comma separated requirements, an + integer indicating the requirement bits OR'd together, or a flags + object returned from the flags attribute of an array with the + necessary requirements. + +Using an ndpointer class in the argtypes method can make it +significantly safer to call a C-function using ctypes and the data- +area of an ndarray. You may still want to wrap the function in an +additional Python wrapper to make it user-friendly (hiding some +obvious arguments and making some arguments output arguments). In this +process, the **requires** function in NumPy may be useful to return the right kind of array from +a given input. + + +Complete example +---------------- + +In this example, I will show how the addition function and the filter +function implemented previously using the other approaches can be +implemented using ctypes. First, the C-code which implements the +algorithms contains the functions zadd, dadd, sadd, cadd, and +dfilter2d. The zadd function is: + +.. code-block:: c + + /* Add arrays of contiguous data */ + typedef struct {double real; double imag;} cdouble; + typedef struct {float real; float imag;} cfloat; + void zadd(cdouble *a, cdouble *b, cdouble *c, long n) + { + while (n--) { + c->real = a->real + b->real; + c->imag = a->imag + b->imag; + a++; b++; c++; + } + } + +with similar code for cadd, dadd, and sadd that handles complex float, +double, and float data-types, respectively: + +.. code-block:: c + + void cadd(cfloat *a, cfloat *b, cfloat *c, long n) + { + while (n--) { + c->real = a->real + b->real; + c->imag = a->imag + b->imag; + a++; b++; c++; + } + } + void dadd(double *a, double *b, double *c, long n) + { + while (n--) { + *c++ = *a++ + *b++; + } + } + void sadd(float *a, float *b, float *c, long n) + { + while (n--) { + *c++ = *a++ + *b++; + } + } + +The code.c file also contains the function dfilter2d: + +.. code-block:: c + + /* Assumes b is contiguous and + a has strides that are multiples of sizeof(double) + */ + void + dfilter2d(double *a, double *b, int *astrides, int *dims) + { + int i, j, M, N, S0, S1; + int r, c, rm1, rp1, cp1, cm1; + + M = dims[0]; N = dims[1]; + S0 = astrides[0]/sizeof(double); + S1=astrides[1]/sizeof(double); + for (i=1; i<M-1; i++) { + r = i*S0; rp1 = r+S0; rm1 = r-S0; + for (j=1; j<N-1; j++) { + c = j*S1; cp1 = j+S1; cm1 = j-S1; + b[i*N+j] = a[r+c] + \ + (a[rp1+c] + a[rm1+c] + \ + a[r+cp1] + a[r+cm1])*0.5 + \ + (a[rp1+cp1] + a[rp1+cm1] + \ + a[rm1+cp1] + a[rm1+cp1])*0.25; + } + } + } + +A possible advantage this code has over the Fortran-equivalent code is +that it takes arbitrarily strided (i.e. non-contiguous arrays) and may +also run faster depending on the optimization capability of your +compiler. But, it is a obviously more complicated than the simple code +in filter.f. This code must be compiled into a shared library. On my +Linux system this is accomplished using:: + + gcc -o code.so -shared code.c + +Which creates a shared_library named code.so in the current directory. +On Windows don't forget to either add __declspec(dllexport) in front +of void on the line preceeding each function definition, or write a +code.def file that lists the names of the functions to be exported. + +A suitable Python interface to this shared library should be +constructed. To do this create a file named interface.py with the +following lines at the top: + +.. code-block:: python + + __all__ = ['add', 'filter2d'] + + import numpy as N + import os + + _path = os.path.dirname('__file__') + lib = N.ctypeslib.load_library('code', _path) + _typedict = {'zadd' : complex, 'sadd' : N.single, + 'cadd' : N.csingle, 'dadd' : float} + for name in _typedict.keys(): + val = getattr(lib, name) + val.restype = None + _type = _typedict[name] + val.argtypes = [N.ctypeslib.ndpointer(_type, + flags='aligned, contiguous'), + N.ctypeslib.ndpointer(_type, + flags='aligned, contiguous'), + N.ctypeslib.ndpointer(_type, + flags='aligned, contiguous,'\ + 'writeable'), + N.ctypeslib.c_intp] + +This code loads the shared library named code.{ext} located in the +same path as this file. It then adds a return type of void to the +functions contained in the library. It also adds argument checking to +the functions in the library so that ndarrays can be passed as the +first three arguments along with an integer (large enough to hold a +pointer on the platform) as the fourth argument. + +Setting up the filtering function is similar and allows the filtering +function to be called with ndarray arguments as the first two +arguments and with pointers to integers (large enough to handle the +strides and shape of an ndarray) as the last two arguments.: + +.. code-block:: python + + lib.dfilter2d.restype=None + lib.dfilter2d.argtypes = [N.ctypeslib.ndpointer(float, ndim=2, + flags='aligned'), + N.ctypeslib.ndpointer(float, ndim=2, + flags='aligned, contiguous,'\ + 'writeable'), + ctypes.POINTER(N.ctypeslib.c_intp), + ctypes.POINTER(N.ctypeslib.c_intp)] + +Next, define a simple selection function that chooses which addition +function to call in the shared library based on the data-type: + +.. code-block:: python + + def select(dtype): + if dtype.char in ['?bBhHf']: + return lib.sadd, single + elif dtype.char in ['F']: + return lib.cadd, csingle + elif dtype.char in ['DG']: + return lib.zadd, complex + else: + return lib.dadd, float + return func, ntype + +Finally, the two functions to be exported by the interface can be +written simply as: + +.. code-block:: python + + def add(a, b): + requires = ['CONTIGUOUS', 'ALIGNED'] + a = N.asanyarray(a) + func, dtype = select(a.dtype) + a = N.require(a, dtype, requires) + b = N.require(b, dtype, requires) + c = N.empty_like(a) + func(a,b,c,a.size) + return c + +and: + +.. code-block:: python + + def filter2d(a): + a = N.require(a, float, ['ALIGNED']) + b = N.zeros_like(a) + lib.dfilter2d(a, b, a.ctypes.strides, a.ctypes.shape) + return b + + +Conclusion +---------- + +.. index:: + single: ctypes + +Using ctypes is a powerful way to connect Python with arbitrary +C-code. It's advantages for extending Python include + +- clean separation of C-code from Python code + + - no need to learn a new syntax except Python and C + + - allows re-use of C-code + + - functionality in shared libraries written for other purposes can be + obtained with a simple Python wrapper and search for the library. + + +- easy integration with NumPy through the ctypes attribute + +- full argument checking with the ndpointer class factory + +It's disadvantages include + +- It is difficult to distribute an extension module made using ctypes + because of a lack of support for building shared libraries in + distutils (but I suspect this will change in time). + +- You must have shared-libraries of your code (no static libraries). + +- Very little support for C++ code and it's different library-calling + conventions. You will probably need a C-wrapper around C++ code to use + with ctypes (or just use Boost.Python instead). + +Because of the difficulty in distributing an extension module made +using ctypes, f2py is still the easiest way to extend Python for +package creation. However, ctypes is a close second and will probably +be growing in popularity now that it is part of the Python +distribution. This should bring more features to ctypes that should +eliminate the difficulty in extending Python and distributing the +extension using ctypes. + + +Additional tools you may find useful +==================================== + +These tools have been found useful by others using Python and so are +included here. They are discussed separately because I see them as +either older ways to do things more modernly handled by f2py, weave, +Pyrex, or ctypes (SWIG, PyFort, PyInline) or because I don't know much +about them (SIP, Boost, Instant). I have not added links to these +methods because my experience is that you can find the most relevant +link faster using Google or some other search engine, and any links +provided here would be quickly dated. Do not assume that just because +it is included in this list, I don't think the package deserves your +attention. I'm including information about these packages because many +people have found them useful and I'd like to give you as many options +as possible for tackling the problem of easily integrating your code. + + +SWIG +---- + +.. index:: + single: swig + +Simplified Wrapper and Interface Generator (SWIG) is an old and fairly +stable method for wrapping C/C++-libraries to a large variety of other +languages. It does not specifically understand NumPy arrays but can be +made useable with NumPy through the use of typemaps. There are some +sample typemaps in the numpy/doc/swig directory under numpy.i along +with an example module that makes use of them. SWIG excels at wrapping +large C/C++ libraries because it can (almost) parse their headers and +auto-produce an interface. Technically, you need to generate a ``.i`` +file that defines the interface. Often, however, this ``.i`` file can +be parts of the header itself. The interface usually needs a bit of +tweaking to be very useful. This ability to parse C/C++ headers and +auto-generate the interface still makes SWIG a useful approach to +adding functionalilty from C/C++ into Python, despite the other +methods that have emerged that are more targeted to Python. SWIG can +actually target extensions for several languages, but the typemaps +usually have to be language-specific. Nonetheless, with modifications +to the Python-specific typemaps, SWIG can be used to interface a +library with other languages such as Perl, Tcl, and Ruby. + +My experience with SWIG has been generally positive in that it is +relatively easy to use and quite powerful. I used to use it quite +often before becoming more proficient at writing C-extensions. +However, I struggled writing custom interfaces with SWIG because it +must be done using the concept of typemaps which are not Python +specific and are written in a C-like syntax. Therefore, I tend to +prefer other gluing strategies and would only attempt to use SWIG to +wrap a very-large C/C++ library. Nonetheless, there are others who use +SWIG quite happily. + + +SIP +--- + +.. index:: + single: SIP + +SIP is another tool for wrapping C/C++ libraries that is Python +specific and appears to have very good support for C++. Riverbank +Computing developed SIP in order to create Python bindings to the QT +library. An interface file must be written to generate the binding, +but the interface file looks a lot like a C/C++ header file. While SIP +is not a full C++ parser, it understands quite a bit of C++ syntax as +well as its own special directives that allow modification of how the +Python binding is accomplished. It also allows the user to define +mappings between Python types and C/C++ structrues and classes. + + +Boost Python +------------ + +.. index:: + single: Boost.Python + +Boost is a repository of C++ libraries and Boost.Python is one of +those libraries which provides a concise interface for binding C++ +classes and functions to Python. The amazing part of the Boost.Python +approach is that it works entirely in pure C++ without introducing a +new syntax. Many users of C++ report that Boost.Python makes it +possible to combine the best of both worlds in a seamless fashion. I +have not used Boost.Python because I am not a big user of C++ and +using Boost to wrap simple C-subroutines is usually over-kill. It's +primary purpose is to make C++ classes available in Python. So, if you +have a set of C++ classes that need to be integrated cleanly into +Python, consider learning about and using Boost.Python. + + +Instant +------- + +.. index:: + single: Instant + +This is a relatively new package (called pyinstant at sourceforge) +that builds on top of SWIG to make it easy to inline C and C++ code in +Python very much like weave. However, Instant builds extension modules +on the fly with specific module names and specific method names. In +this repsect it is more more like f2py in its behavior. The extension +modules are built on-the fly (as long as the SWIG is installed). They +can then be imported. Here is an example of using Instant with NumPy +arrays (adapted from the test2 included in the Instant distribution): + +.. code-block:: python + + code=""" + PyObject* add(PyObject* a_, PyObject* b_){ + /* + various checks + */ + PyArrayObject* a=(PyArrayObject*) a_; + PyArrayObject* b=(PyArrayObject*) b_; + int n = a->dimensions[0]; + int dims[1]; + dims[0] = n; + PyArrayObject* ret; + ret = (PyArrayObject*) PyArray_FromDims(1, dims, NPY_DOUBLE); + int i; + char *aj=a->data; + char *bj=b->data; + double *retj = (double *)ret->data; + for (i=0; i < n; i++) { + *retj++ = *((double *)aj) + *((double *)bj); + aj += a->strides[0]; + bj += b->strides[0]; + } + return (PyObject *)ret; + } + """ + import Instant, numpy + ext = Instant.Instant() + ext.create_extension(code=s, headers=["numpy/arrayobject.h"], + include_dirs=[numpy.get_include()], + init_code='import_array();', module="test2b_ext") + import test2b_ext + a = numpy.arange(1000) + b = numpy.arange(1000) + d = test2b_ext.add(a,b) + +Except perhaps for the dependence on SWIG, Instant is a +straightforward utility for writing extension modules. + + +PyInline +-------- + +This is a much older module that allows automatic building of +extension modules so that C-code can be included with Python code. +It's latest release (version 0.03) was in 2001, and it appears that it +is not being updated. + + +PyFort +------ + +PyFort is a nice tool for wrapping Fortran and Fortran-like C-code +into Python with support for Numeric arrays. It was written by Paul +Dubois, a distinguished computer scientist and the very first +maintainer of Numeric (now retired). It is worth mentioning in the +hopes that somebody will update PyFort to work with NumPy arrays as +well which now support either Fortran or C-style contiguous arrays. diff --git a/doc/source/user/c-info.rst b/doc/source/user/c-info.rst new file mode 100644 index 000000000..086f97c8d --- /dev/null +++ b/doc/source/user/c-info.rst @@ -0,0 +1,9 @@ +################# +Using Numpy C-API +################# + +.. toctree:: + + c-info.how-to-extend + c-info.python-as-glue + c-info.beyond-basics diff --git a/doc/source/user/howtofind.rst b/doc/source/user/howtofind.rst new file mode 100644 index 000000000..5f6b49012 --- /dev/null +++ b/doc/source/user/howtofind.rst @@ -0,0 +1,9 @@ +************************* +How to find documentation +************************* + +.. seealso:: :ref:`Numpy-specific help functions <routines.help>` + +.. note:: XXX: this part is not yet written. + +.. automodule:: numpy.doc.howtofind diff --git a/doc/source/user/index.rst b/doc/source/user/index.rst new file mode 100644 index 000000000..750062d50 --- /dev/null +++ b/doc/source/user/index.rst @@ -0,0 +1,27 @@ +.. _user: + +################ +Numpy User Guide +################ + +This guide explains how to make use of different features +of Numpy. For a detailed documentation about different functions +and classes, see :ref:`reference`. + +.. warning:: + + This "User Guide" is still very much work in progress; the material + is not organized, and many aspects of Numpy are not covered. + + More documentation for Numpy can be found on the + `scipy.org <http://www.scipy.org/Documentation>`__ website. + +.. toctree:: + :maxdepth: 2 + + howtofind + basics + performance + misc + c-info + diff --git a/doc/source/user/misc.rst b/doc/source/user/misc.rst new file mode 100644 index 000000000..4e2ec9fdb --- /dev/null +++ b/doc/source/user/misc.rst @@ -0,0 +1,9 @@ +************* +Miscellaneous +************* + +.. note:: XXX: This section is not yet written. + +.. automodule:: numpy.doc.misc + +.. automodule:: numpy.doc.methods_vs_functions diff --git a/doc/source/user/performance.rst b/doc/source/user/performance.rst new file mode 100644 index 000000000..1f6e4e16c --- /dev/null +++ b/doc/source/user/performance.rst @@ -0,0 +1,7 @@ +*********** +Performance +*********** + +.. note:: XXX: This section is not yet written. + +.. automodule:: numpy.doc.performance diff --git a/doc/summarize.py b/doc/summarize.py new file mode 100755 index 000000000..87f4c0725 --- /dev/null +++ b/doc/summarize.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python +""" +summarize.py + +Show a summary about which Numpy functions are documented and which are not. + +""" + +import os, glob, re, sys, inspect, optparse +sys.path.append(os.path.join(os.path.dirname(__file__), 'ext')) +from ext.phantom_import import import_phantom_module + +from ext.autosummary_generate import get_documented + +CUR_DIR = os.path.dirname(__file__) +SOURCE_DIR = os.path.join(CUR_DIR, 'source', 'reference') + +SKIP_LIST = """ +# --- aliases: +alltrue sometrue bitwise_not cumproduct +row_stack column_stack product rank + +# -- skipped: +core lib f2py dual doc emath ma rec char distutils oldnumeric numarray +testing version matlib + +add_docstring add_newdoc add_newdocs fastCopyAndTranspose pkgload +conjugate disp + +int0 object0 unicode0 uint0 string_ string0 void0 + +flagsobj + +setup setupscons PackageLoader + +lib.scimath.arccos lib.scimath.arcsin lib.scimath.arccosh lib.scimath.arcsinh +lib.scimath.arctanh lib.scimath.log lib.scimath.log2 lib.scimath.log10 +lib.scimath.logn lib.scimath.power lib.scimath.sqrt + +# --- numpy.random: +random random.info random.mtrand random.ranf random.sample random.random + +# --- numpy.fft: +fft fft.Tester fft.bench fft.fftpack fft.fftpack_lite fft.helper +fft.refft fft.refft2 fft.refftn fft.irefft fft.irefft2 fft.irefftn +fft.info fft.test + +# --- numpy.linalg: +linalg linalg.Tester +linalg.bench linalg.info linalg.lapack_lite linalg.linalg linalg.test + +# --- numpy.ctypeslib: +ctypeslib ctypeslib.test + +""".split() + +def main(): + p = optparse.OptionParser(__doc__) + options, args = p.parse_args() + + if len(args) != 0: + p.error('Wrong number of arguments') + + # prepare + fn = os.path.join(CUR_DIR, 'dump.xml') + if os.path.isfile(fn): + import_phantom_module(fn) + + # check + documented, undocumented = check_numpy() + + # report + in_sections = {} + for name, locations in documented.iteritems(): + for (filename, section, keyword, toctree) in locations: + in_sections.setdefault((filename, section, keyword), []).append(name) + + print "Documented" + print "==========\n" + + last_filename = None + for (filename, section, keyword), names in sorted(in_sections.items()): + if filename != last_filename: + print "--- %s\n" % filename + last_filename = filename + print " ** ", section + print format_in_columns(sorted(names)) + print "\n" + + print "" + print "Undocumented" + print "============\n" + print format_in_columns(sorted(undocumented.keys())) + +def check_numpy(): + documented = get_documented(glob.glob(SOURCE_DIR + '/*.rst')) + undocumented = {} + + import numpy, numpy.fft, numpy.linalg, numpy.random + for mod in [numpy, numpy.fft, numpy.linalg, numpy.random, + numpy.ctypeslib, numpy.emath, numpy.ma]: + undocumented.update(get_undocumented(documented, mod, skip=SKIP_LIST)) + + for d in (documented, undocumented): + for k in d.keys(): + if k.startswith('numpy.'): + d[k[6:]] = d[k] + del d[k] + + return documented, undocumented + +def get_undocumented(documented, module, module_name=None, skip=[]): + """ + Find out which items in Numpy are not documented. + + Returns + ------- + undocumented : dict of bool + Dictionary containing True for each documented item name + and False for each undocumented one. + + """ + undocumented = {} + + if module_name is None: + module_name = module.__name__ + + for name in dir(module): + obj = getattr(module, name) + if name.startswith('_'): continue + + full_name = '.'.join([module_name, name]) + + if full_name in skip: continue + if full_name.startswith('numpy.') and full_name[6:] in skip: continue + if not (inspect.ismodule(obj) or callable(obj) or inspect.isclass(obj)): + continue + + if full_name not in documented: + undocumented[full_name] = True + + return undocumented + +def format_in_columns(lst): + """ + Format a list containing strings to a string containing the items + in columns. + """ + lst = map(str, lst) + col_len = max(map(len, lst)) + 2 + ncols = 80//col_len + if ncols == 0: + ncols = 1 + + if len(lst) % ncols == 0: + nrows = len(lst)//ncols + else: + nrows = 1 + len(lst)//ncols + + fmt = ' %%-%ds ' % (col_len-2) + + lines = [] + for n in range(nrows): + lines.append("".join([fmt % x for x in lst[n::nrows]])) + return "\n".join(lines) + +if __name__ == "__main__": main() |