diff options
author | Dieter Verfaillie <dieterv@optionexplicit.be> | 2013-08-14 07:42:17 +0200 |
---|---|---|
committer | Dieter Verfaillie <dieterv@optionexplicit.be> | 2013-10-08 20:56:14 +0200 |
commit | 97746398f1c3ea06b3020f43fb97d4251fc7eddf (patch) | |
tree | 58449bb63f6d323a53f0fd23dbb2e80416b6b5cd | |
parent | 556bb8ee3402b92e2936ed3b594cdfc0b04a9db5 (diff) | |
download | gobject-introspection-97746398f1c3ea06b3020f43fb97d4251fc7eddf.tar.gz |
giscanner: store indentation before the '*' of each line
-rw-r--r-- | Makefile-giscanner.am | 1 | ||||
-rw-r--r-- | giscanner/annotationparser.py | 34 | ||||
-rw-r--r-- | giscanner/collections/__init__.py | 1 | ||||
-rw-r--r-- | giscanner/collections/counter.py | 305 | ||||
-rw-r--r-- | tests/scanner/annotationparser/test_patterns.py | 19 |
5 files changed, 346 insertions, 14 deletions
diff --git a/Makefile-giscanner.am b/Makefile-giscanner.am index 47750864..095bbd6a 100644 --- a/Makefile-giscanner.am +++ b/Makefile-giscanner.am @@ -55,6 +55,7 @@ pkgpyexec_PYTHON = \ collectionsdir = $(pkgpyexecdir)/collections collections_PYTHON = \ giscanner/collections/__init__.py \ + giscanner/collections/counter.py \ giscanner/collections/ordereddict.py templatedir = $(pkglibdir) diff --git a/giscanner/annotationparser.py b/giscanner/annotationparser.py index 3dd54ef8..b76ec64f 100644 --- a/giscanner/annotationparser.py +++ b/giscanner/annotationparser.py @@ -111,7 +111,7 @@ import re from operator import ne, gt, lt -from .collections import OrderedDict +from .collections import Counter, OrderedDict from .message import Position, warn, error @@ -329,12 +329,9 @@ COMMENT_ASTERISK_RE = re.compile( ''', re.UNICODE | re.VERBOSE) -# Program matching the indentation at the beginning of every -# line (stripped from the ' * ') inside a comment block. -# -# Results in 1 symbolic group: -# - group 1 = indentation -COMMENT_INDENTATION_RE = re.compile( +# Pattern matching the indentation level of a line (used +# to get the indentation before and after the ' * '). +INDENTATION_RE = re.compile( r''' ^ (?P<indentation>\s*) # 0 or more whitespace characters @@ -973,7 +970,8 @@ class GtkDocCommentBlock(GtkDocAnnotatable): Represents a GTK-Doc comment block. ''' - __slots__ = ('code_before', 'code_after', 'name', 'params', 'description', 'tags') + __slots__ = ('code_before', 'code_after', 'indentation', + 'name', 'params', 'description', 'tags') #: Valid annotation names for the GTK-Doc comment block identifier part. valid_annotations = (ANN_ATTRIBUTES, ANN_CONSTRUCTOR, ANN_FOREIGN, ANN_GET_VALUE_FUNC, @@ -989,6 +987,10 @@ class GtkDocCommentBlock(GtkDocAnnotatable): #: Code following the GTK-Doc comment block end token ("``*/``"), if any. self.code_after = None + #: List of indentation levels (preceding the "``*``") for all lines in the comment + #: block's source text. + self.indentation = [] + #: Identifier name. self.name = name @@ -1220,8 +1222,9 @@ class GtkDocCommentBlockParser(object): # that looks like a GTK-Doc comment block. comment_block = None identifier_warned = False - part_indent = None + block_indent = [] line_indent = None + part_indent = None in_part = None current_part = None returns_seen = False @@ -1234,14 +1237,18 @@ class GtkDocCommentBlockParser(object): original_line = line column_offset = 0 + # Store indentation level of the comment (before the ' * ') + result = INDENTATION_RE.match(line) + block_indent.append(result.group('indentation')) + # Get rid of the ' * ' at the start of the line. result = COMMENT_ASTERISK_RE.match(line) if result: column_offset = result.end(0) line = line[result.end(0):] - # Store indentation level of the line. - result = COMMENT_INDENTATION_RE.match(line) + # Store indentation level of the line (after the ' * '). + result = INDENTATION_RE.match(line) line_indent = len(result.group('indentation').replace('\t', ' ')) #################################################################### @@ -1326,6 +1333,7 @@ class GtkDocCommentBlockParser(object): #################################################################### result = PARAMETER_RE.match(line) if result: + part_indent = line_indent param_name = result.group('parameter_name') param_name_lower = param_name.lower() param_fields = result.group('fields') @@ -1335,8 +1343,6 @@ class GtkDocCommentBlockParser(object): if in_part == PART_IDENTIFIER: in_part = PART_PARAMETERS - part_indent = line_indent - if in_part != PART_PARAMETERS: column = result.start('parameter_name') + column_offset warn("'@%s' parameter unexpected at this location:\n%s\n%s" % @@ -1597,7 +1603,7 @@ class GtkDocCommentBlockParser(object): for param in comment_block.params.values(): self._clean_comment_block_part(param) - # Validate and store block. + comment_block.indentation = block_indent comment_block.validate() return comment_block else: diff --git a/giscanner/collections/__init__.py b/giscanner/collections/__init__.py index 29987a10..aa3814a7 100644 --- a/giscanner/collections/__init__.py +++ b/giscanner/collections/__init__.py @@ -19,4 +19,5 @@ # +from .counter import Counter from .ordereddict import OrderedDict diff --git a/giscanner/collections/counter.py b/giscanner/collections/counter.py new file mode 100644 index 00000000..b337ab3f --- /dev/null +++ b/giscanner/collections/counter.py @@ -0,0 +1,305 @@ +# -*- Mode: Python -*- +# GObject-Introspection - a framework for introspecting GObject libraries +# Copyright (C) 2013 Dieter Verfaillie <dieterv@optionexplicit.be> +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301, USA. +# + + +from __future__ import absolute_import + + +try: + from collections import Counter +except ImportError: + # collections.Counter for Python 2.6, backported from + # http://hg.python.org/cpython/file/d047928ae3f6/Lib/collections/__init__.py#l402 + + from operator import itemgetter + from heapq import nlargest + from itertools import repeat, ifilter + + class Counter(dict): + '''Dict subclass for counting hashable items. Sometimes called a bag + or multiset. Elements are stored as dictionary keys and their counts + are stored as dictionary values. + + >>> c = Counter('abcdeabcdabcaba') # count elements from a string + + >>> c.most_common(3) # three most common elements + [('a', 5), ('b', 4), ('c', 3)] + >>> sorted(c) # list all unique elements + ['a', 'b', 'c', 'd', 'e'] + >>> ''.join(sorted(c.elements())) # list elements with repetitions + 'aaaaabbbbcccdde' + >>> sum(c.values()) # total of all counts + 15 + + >>> c['a'] # count of letter 'a' + 5 + >>> for elem in 'shazam': # update counts from an iterable + ... c[elem] += 1 # by adding 1 to each element's count + >>> c['a'] # now there are seven 'a' + 7 + >>> del c['b'] # remove all 'b' + >>> c['b'] # now there are zero 'b' + 0 + + >>> d = Counter('simsalabim') # make another counter + >>> c.update(d) # add in the second counter + >>> c['a'] # now there are nine 'a' + 9 + + >>> c.clear() # empty the counter + >>> c + Counter() + + Note: If a count is set to zero or reduced to zero, it will remain + in the counter until the entry is deleted or the counter is cleared: + + >>> c = Counter('aaabbc') + >>> c['b'] -= 2 # reduce the count of 'b' by two + >>> c.most_common() # 'b' is still in, but its count is zero + [('a', 3), ('c', 1), ('b', 0)] + + ''' + # References: + # http://en.wikipedia.org/wiki/Multiset + # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html + # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm + # http://code.activestate.com/recipes/259174/ + # Knuth, TAOCP Vol. II section 4.6.3 + + def __init__(self, iterable=None, **kwds): + '''Create a new, empty Counter object. And if given, count elements + from an input iterable. Or, initialize the count from another mapping + of elements to their counts. + + >>> c = Counter() # a new, empty counter + >>> c = Counter('gallahad') # a new counter from an iterable + >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping + >>> c = Counter(a=4, b=2) # a new counter from keyword args + + ''' + self.update(iterable, **kwds) + + def __missing__(self, key): + 'The count of elements not in the Counter is zero.' + # Needed so that self[missing_item] does not raise KeyError + return 0 + + def most_common(self, n=None): + '''List the n most common elements and their counts from the most + common to the least. If n is None, then list all element counts. + + >>> Counter('abcdeabcdabcaba').most_common(3) + [('a', 5), ('b', 4), ('c', 3)] + + ''' + # Emulate Bag.sortedByCount from Smalltalk + if n is None: + return sorted(self.iteritems(), key=itemgetter(1), reverse=True) + return nlargest(n, self.iteritems(), key=itemgetter(1)) + + def elements(self): + '''Iterator over elements repeating each as many times as its count. + + >>> c = Counter('ABCABC') + >>> sorted(c.elements()) + ['A', 'A', 'B', 'B', 'C', 'C'] + + # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1 + >>> prime_factors = Counter({2: 2, 3: 3, 17: 1}) + >>> product = 1 + >>> for factor in prime_factors.elements(): # loop over factors + ... product *= factor # and multiply them + >>> product + 1836 + + Note, if an element's count has been set to zero or is a negative + number, elements() will ignore it. + + ''' + # Emulate Bag.do from Smalltalk and Multiset.begin from C++. + for elem, count in self.iteritems(): + for _ in repeat(None, count): + yield elem + + # Override dict methods where necessary + + @classmethod + def fromkeys(cls, iterable, v=None): + # There is no equivalent method for counters because setting v=1 + # means that no element can have a count greater than one. + raise NotImplementedError( + 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') + + def update(self, iterable=None, **kwds): + '''Like dict.update() but add counts instead of replacing them. + + Source can be an iterable, a dictionary, or another Counter instance. + + >>> c = Counter('which') + >>> c.update('witch') # add elements from another iterable + >>> d = Counter('watch') + >>> c.update(d) # add elements from another counter + >>> c['h'] # four 'h' in which, witch, and watch + 4 + + ''' + # The regular dict.update() operation makes no sense here because the + # replace behavior results in the some of original untouched counts + # being mixed-in with all of the other counts for a mismash that + # doesn't have a straight-forward interpretation in most counting + # contexts. Instead, we implement straight-addition. Both the inputs + # and outputs are allowed to contain zero and negative counts. + + if iterable is not None: + if hasattr(iterable, 'iteritems'): + if self: + self_get = self.get + for elem, count in iterable.iteritems(): + self[elem] = self_get(elem, 0) + count + else: + dict.update(self, iterable) # fast path when counter is empty + else: + self_get = self.get + for elem in iterable: + self[elem] = self_get(elem, 0) + 1 + if kwds: + self.update(kwds) + + def subtract(self, iterable=None, **kwds): + '''Like dict.update() but subtracts counts instead of replacing them. + Counts can be reduced below zero. Both the inputs and outputs are + allowed to contain zero and negative counts. + + Source can be an iterable, a dictionary, or another Counter instance. + + >>> c = Counter('which') + >>> c.subtract('witch') # subtract elements from another iterable + >>> c.subtract(Counter('watch')) # subtract elements from another counter + >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch + 0 + >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch + -1 + + ''' + if hasattr(iterable, 'iteritems'): + for elem, count in iterable.iteritems(): + self[elem] -= count + else: + for elem in iterable: + self[elem] -= 1 + + def copy(self): + 'Return a shallow copy.' + return self.__class__(self) + + def __reduce__(self): + return self.__class__, (dict(self), ) + + def __delitem__(self, elem): + 'Like dict.__delitem__() but does not raise KeyError for missing values.' + if elem in self: + dict.__delitem__(self, elem) + + def __repr__(self): + if not self: + return '%s()' % self.__class__.__name__ + items = ', '.join(map('%r: %r'.__mod__, self.most_common())) + return '%s({%s})' % (self.__class__.__name__, items) + + # Multiset-style mathematical operations discussed in: + # Knuth TAOCP Volume II section 4.6.3 exercise 19 + # and at http://en.wikipedia.org/wiki/Multiset + # + # Outputs guaranteed to only include positive counts. + # + # To strip negative and zero counts, add-in an empty counter: + # c += Counter() + + def __add__(self, other): + '''Add counts from two counters. + + >>> Counter('abbb') + Counter('bcc') + Counter({'b': 4, 'c': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem in set(self) | set(other): + newcount = self[elem] + other[elem] + if newcount > 0: + result[elem] = newcount + return result + + def __sub__(self, other): + ''' Subtract count, but keep only results with positive counts. + + >>> Counter('abbbc') - Counter('bccd') + Counter({'b': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem in set(self) | set(other): + newcount = self[elem] - other[elem] + if newcount > 0: + result[elem] = newcount + return result + + def __or__(self, other): + '''Union is the maximum of value in either of the input counters. + + >>> Counter('abbb') | Counter('bcc') + Counter({'b': 3, 'c': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + _max = max + result = Counter() + for elem in set(self) | set(other): + newcount = _max(self[elem], other[elem]) + if newcount > 0: + result[elem] = newcount + return result + + def __and__(self, other): + ''' Intersection is the minimum of corresponding counts. + + >>> Counter('abbb') & Counter('bcc') + Counter({'b': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + _min = min + result = Counter() + if len(self) < len(other): + self, other = other, self + for elem in ifilter(self.__contains__, other): + newcount = _min(self[elem], other[elem]) + if newcount > 0: + result[elem] = newcount + return result + + if __name__ == '__main__': + import doctest + print(doctest.testmod()) diff --git a/tests/scanner/annotationparser/test_patterns.py b/tests/scanner/annotationparser/test_patterns.py index ce82cf90..ead487d7 100644 --- a/tests/scanner/annotationparser/test_patterns.py +++ b/tests/scanner/annotationparser/test_patterns.py @@ -32,6 +32,7 @@ against the expected output. from giscanner.annotationparser import (COMMENT_BLOCK_START_RE, COMMENT_BLOCK_END_RE, + COMMENT_ASTERISK_RE, SECTION_RE, SYMBOL_RE, PROPERTY_RE, SIGNAL_RE, PARAMETER_RE, TAG_RE, TAG_VALUE_VERSION_RE, TAG_VALUE_STABILITY_RE) @@ -157,6 +158,23 @@ comment_end_tests = [ 'code': ''})] +comment_asterisk_tests = [ + (COMMENT_ASTERISK_RE, '*', + {}), + (COMMENT_ASTERISK_RE, '* ', + {}), + (COMMENT_ASTERISK_RE, ' *', + {}), + (COMMENT_ASTERISK_RE, ' * ', + {}), + (COMMENT_ASTERISK_RE, ' * ', + {}), + (COMMENT_ASTERISK_RE, ' * test', + {}), + (COMMENT_ASTERISK_RE, 'test * ', + None)] + + identifier_section_tests = [ (SECTION_RE, 'TSIEOCN', None), @@ -733,6 +751,7 @@ class TestProgram(TestCase): if __name__ == '__main__': create_tests('test_comment_start', comment_start_tests) create_tests('test_comment_end', comment_end_tests) + create_tests('test_comment_asterisk', comment_asterisk_tests) create_tests('test_identifier_section', identifier_section_tests) create_tests('test_identifier_symbol', identifier_symbol_tests) create_tests('test_identifier_property', identifier_property_tests) |