summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDieter Verfaillie <dieterv@optionexplicit.be>2013-08-14 07:42:17 +0200
committerDieter Verfaillie <dieterv@optionexplicit.be>2013-10-08 20:56:14 +0200
commit97746398f1c3ea06b3020f43fb97d4251fc7eddf (patch)
tree58449bb63f6d323a53f0fd23dbb2e80416b6b5cd
parent556bb8ee3402b92e2936ed3b594cdfc0b04a9db5 (diff)
downloadgobject-introspection-97746398f1c3ea06b3020f43fb97d4251fc7eddf.tar.gz
giscanner: store indentation before the '*' of each line
-rw-r--r--Makefile-giscanner.am1
-rw-r--r--giscanner/annotationparser.py34
-rw-r--r--giscanner/collections/__init__.py1
-rw-r--r--giscanner/collections/counter.py305
-rw-r--r--tests/scanner/annotationparser/test_patterns.py19
5 files changed, 346 insertions, 14 deletions
diff --git a/Makefile-giscanner.am b/Makefile-giscanner.am
index 47750864..095bbd6a 100644
--- a/Makefile-giscanner.am
+++ b/Makefile-giscanner.am
@@ -55,6 +55,7 @@ pkgpyexec_PYTHON = \
collectionsdir = $(pkgpyexecdir)/collections
collections_PYTHON = \
giscanner/collections/__init__.py \
+ giscanner/collections/counter.py \
giscanner/collections/ordereddict.py
templatedir = $(pkglibdir)
diff --git a/giscanner/annotationparser.py b/giscanner/annotationparser.py
index 3dd54ef8..b76ec64f 100644
--- a/giscanner/annotationparser.py
+++ b/giscanner/annotationparser.py
@@ -111,7 +111,7 @@ import re
from operator import ne, gt, lt
-from .collections import OrderedDict
+from .collections import Counter, OrderedDict
from .message import Position, warn, error
@@ -329,12 +329,9 @@ COMMENT_ASTERISK_RE = re.compile(
''',
re.UNICODE | re.VERBOSE)
-# Program matching the indentation at the beginning of every
-# line (stripped from the ' * ') inside a comment block.
-#
-# Results in 1 symbolic group:
-# - group 1 = indentation
-COMMENT_INDENTATION_RE = re.compile(
+# Pattern matching the indentation level of a line (used
+# to get the indentation before and after the ' * ').
+INDENTATION_RE = re.compile(
r'''
^
(?P<indentation>\s*) # 0 or more whitespace characters
@@ -973,7 +970,8 @@ class GtkDocCommentBlock(GtkDocAnnotatable):
Represents a GTK-Doc comment block.
'''
- __slots__ = ('code_before', 'code_after', 'name', 'params', 'description', 'tags')
+ __slots__ = ('code_before', 'code_after', 'indentation',
+ 'name', 'params', 'description', 'tags')
#: Valid annotation names for the GTK-Doc comment block identifier part.
valid_annotations = (ANN_ATTRIBUTES, ANN_CONSTRUCTOR, ANN_FOREIGN, ANN_GET_VALUE_FUNC,
@@ -989,6 +987,10 @@ class GtkDocCommentBlock(GtkDocAnnotatable):
#: Code following the GTK-Doc comment block end token ("``*/``"), if any.
self.code_after = None
+ #: List of indentation levels (preceding the "``*``") for all lines in the comment
+ #: block's source text.
+ self.indentation = []
+
#: Identifier name.
self.name = name
@@ -1220,8 +1222,9 @@ class GtkDocCommentBlockParser(object):
# that looks like a GTK-Doc comment block.
comment_block = None
identifier_warned = False
- part_indent = None
+ block_indent = []
line_indent = None
+ part_indent = None
in_part = None
current_part = None
returns_seen = False
@@ -1234,14 +1237,18 @@ class GtkDocCommentBlockParser(object):
original_line = line
column_offset = 0
+ # Store indentation level of the comment (before the ' * ')
+ result = INDENTATION_RE.match(line)
+ block_indent.append(result.group('indentation'))
+
# Get rid of the ' * ' at the start of the line.
result = COMMENT_ASTERISK_RE.match(line)
if result:
column_offset = result.end(0)
line = line[result.end(0):]
- # Store indentation level of the line.
- result = COMMENT_INDENTATION_RE.match(line)
+ # Store indentation level of the line (after the ' * ').
+ result = INDENTATION_RE.match(line)
line_indent = len(result.group('indentation').replace('\t', ' '))
####################################################################
@@ -1326,6 +1333,7 @@ class GtkDocCommentBlockParser(object):
####################################################################
result = PARAMETER_RE.match(line)
if result:
+ part_indent = line_indent
param_name = result.group('parameter_name')
param_name_lower = param_name.lower()
param_fields = result.group('fields')
@@ -1335,8 +1343,6 @@ class GtkDocCommentBlockParser(object):
if in_part == PART_IDENTIFIER:
in_part = PART_PARAMETERS
- part_indent = line_indent
-
if in_part != PART_PARAMETERS:
column = result.start('parameter_name') + column_offset
warn("'@%s' parameter unexpected at this location:\n%s\n%s" %
@@ -1597,7 +1603,7 @@ class GtkDocCommentBlockParser(object):
for param in comment_block.params.values():
self._clean_comment_block_part(param)
- # Validate and store block.
+ comment_block.indentation = block_indent
comment_block.validate()
return comment_block
else:
diff --git a/giscanner/collections/__init__.py b/giscanner/collections/__init__.py
index 29987a10..aa3814a7 100644
--- a/giscanner/collections/__init__.py
+++ b/giscanner/collections/__init__.py
@@ -19,4 +19,5 @@
#
+from .counter import Counter
from .ordereddict import OrderedDict
diff --git a/giscanner/collections/counter.py b/giscanner/collections/counter.py
new file mode 100644
index 00000000..b337ab3f
--- /dev/null
+++ b/giscanner/collections/counter.py
@@ -0,0 +1,305 @@
+# -*- Mode: Python -*-
+# GObject-Introspection - a framework for introspecting GObject libraries
+# Copyright (C) 2013 Dieter Verfaillie <dieterv@optionexplicit.be>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301, USA.
+#
+
+
+from __future__ import absolute_import
+
+
+try:
+ from collections import Counter
+except ImportError:
+ # collections.Counter for Python 2.6, backported from
+ # http://hg.python.org/cpython/file/d047928ae3f6/Lib/collections/__init__.py#l402
+
+ from operator import itemgetter
+ from heapq import nlargest
+ from itertools import repeat, ifilter
+
+ class Counter(dict):
+ '''Dict subclass for counting hashable items. Sometimes called a bag
+ or multiset. Elements are stored as dictionary keys and their counts
+ are stored as dictionary values.
+
+ >>> c = Counter('abcdeabcdabcaba') # count elements from a string
+
+ >>> c.most_common(3) # three most common elements
+ [('a', 5), ('b', 4), ('c', 3)]
+ >>> sorted(c) # list all unique elements
+ ['a', 'b', 'c', 'd', 'e']
+ >>> ''.join(sorted(c.elements())) # list elements with repetitions
+ 'aaaaabbbbcccdde'
+ >>> sum(c.values()) # total of all counts
+ 15
+
+ >>> c['a'] # count of letter 'a'
+ 5
+ >>> for elem in 'shazam': # update counts from an iterable
+ ... c[elem] += 1 # by adding 1 to each element's count
+ >>> c['a'] # now there are seven 'a'
+ 7
+ >>> del c['b'] # remove all 'b'
+ >>> c['b'] # now there are zero 'b'
+ 0
+
+ >>> d = Counter('simsalabim') # make another counter
+ >>> c.update(d) # add in the second counter
+ >>> c['a'] # now there are nine 'a'
+ 9
+
+ >>> c.clear() # empty the counter
+ >>> c
+ Counter()
+
+ Note: If a count is set to zero or reduced to zero, it will remain
+ in the counter until the entry is deleted or the counter is cleared:
+
+ >>> c = Counter('aaabbc')
+ >>> c['b'] -= 2 # reduce the count of 'b' by two
+ >>> c.most_common() # 'b' is still in, but its count is zero
+ [('a', 3), ('c', 1), ('b', 0)]
+
+ '''
+ # References:
+ # http://en.wikipedia.org/wiki/Multiset
+ # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
+ # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
+ # http://code.activestate.com/recipes/259174/
+ # Knuth, TAOCP Vol. II section 4.6.3
+
+ def __init__(self, iterable=None, **kwds):
+ '''Create a new, empty Counter object. And if given, count elements
+ from an input iterable. Or, initialize the count from another mapping
+ of elements to their counts.
+
+ >>> c = Counter() # a new, empty counter
+ >>> c = Counter('gallahad') # a new counter from an iterable
+ >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
+ >>> c = Counter(a=4, b=2) # a new counter from keyword args
+
+ '''
+ self.update(iterable, **kwds)
+
+ def __missing__(self, key):
+ 'The count of elements not in the Counter is zero.'
+ # Needed so that self[missing_item] does not raise KeyError
+ return 0
+
+ def most_common(self, n=None):
+ '''List the n most common elements and their counts from the most
+ common to the least. If n is None, then list all element counts.
+
+ >>> Counter('abcdeabcdabcaba').most_common(3)
+ [('a', 5), ('b', 4), ('c', 3)]
+
+ '''
+ # Emulate Bag.sortedByCount from Smalltalk
+ if n is None:
+ return sorted(self.iteritems(), key=itemgetter(1), reverse=True)
+ return nlargest(n, self.iteritems(), key=itemgetter(1))
+
+ def elements(self):
+ '''Iterator over elements repeating each as many times as its count.
+
+ >>> c = Counter('ABCABC')
+ >>> sorted(c.elements())
+ ['A', 'A', 'B', 'B', 'C', 'C']
+
+ # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
+ >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
+ >>> product = 1
+ >>> for factor in prime_factors.elements(): # loop over factors
+ ... product *= factor # and multiply them
+ >>> product
+ 1836
+
+ Note, if an element's count has been set to zero or is a negative
+ number, elements() will ignore it.
+
+ '''
+ # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
+ for elem, count in self.iteritems():
+ for _ in repeat(None, count):
+ yield elem
+
+ # Override dict methods where necessary
+
+ @classmethod
+ def fromkeys(cls, iterable, v=None):
+ # There is no equivalent method for counters because setting v=1
+ # means that no element can have a count greater than one.
+ raise NotImplementedError(
+ 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
+
+ def update(self, iterable=None, **kwds):
+ '''Like dict.update() but add counts instead of replacing them.
+
+ Source can be an iterable, a dictionary, or another Counter instance.
+
+ >>> c = Counter('which')
+ >>> c.update('witch') # add elements from another iterable
+ >>> d = Counter('watch')
+ >>> c.update(d) # add elements from another counter
+ >>> c['h'] # four 'h' in which, witch, and watch
+ 4
+
+ '''
+ # The regular dict.update() operation makes no sense here because the
+ # replace behavior results in the some of original untouched counts
+ # being mixed-in with all of the other counts for a mismash that
+ # doesn't have a straight-forward interpretation in most counting
+ # contexts. Instead, we implement straight-addition. Both the inputs
+ # and outputs are allowed to contain zero and negative counts.
+
+ if iterable is not None:
+ if hasattr(iterable, 'iteritems'):
+ if self:
+ self_get = self.get
+ for elem, count in iterable.iteritems():
+ self[elem] = self_get(elem, 0) + count
+ else:
+ dict.update(self, iterable) # fast path when counter is empty
+ else:
+ self_get = self.get
+ for elem in iterable:
+ self[elem] = self_get(elem, 0) + 1
+ if kwds:
+ self.update(kwds)
+
+ def subtract(self, iterable=None, **kwds):
+ '''Like dict.update() but subtracts counts instead of replacing them.
+ Counts can be reduced below zero. Both the inputs and outputs are
+ allowed to contain zero and negative counts.
+
+ Source can be an iterable, a dictionary, or another Counter instance.
+
+ >>> c = Counter('which')
+ >>> c.subtract('witch') # subtract elements from another iterable
+ >>> c.subtract(Counter('watch')) # subtract elements from another counter
+ >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
+ 0
+ >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
+ -1
+
+ '''
+ if hasattr(iterable, 'iteritems'):
+ for elem, count in iterable.iteritems():
+ self[elem] -= count
+ else:
+ for elem in iterable:
+ self[elem] -= 1
+
+ def copy(self):
+ 'Return a shallow copy.'
+ return self.__class__(self)
+
+ def __reduce__(self):
+ return self.__class__, (dict(self), )
+
+ def __delitem__(self, elem):
+ 'Like dict.__delitem__() but does not raise KeyError for missing values.'
+ if elem in self:
+ dict.__delitem__(self, elem)
+
+ def __repr__(self):
+ if not self:
+ return '%s()' % self.__class__.__name__
+ items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
+ return '%s({%s})' % (self.__class__.__name__, items)
+
+ # Multiset-style mathematical operations discussed in:
+ # Knuth TAOCP Volume II section 4.6.3 exercise 19
+ # and at http://en.wikipedia.org/wiki/Multiset
+ #
+ # Outputs guaranteed to only include positive counts.
+ #
+ # To strip negative and zero counts, add-in an empty counter:
+ # c += Counter()
+
+ def __add__(self, other):
+ '''Add counts from two counters.
+
+ >>> Counter('abbb') + Counter('bcc')
+ Counter({'b': 4, 'c': 2, 'a': 1})
+
+ '''
+ if not isinstance(other, Counter):
+ return NotImplemented
+ result = Counter()
+ for elem in set(self) | set(other):
+ newcount = self[elem] + other[elem]
+ if newcount > 0:
+ result[elem] = newcount
+ return result
+
+ def __sub__(self, other):
+ ''' Subtract count, but keep only results with positive counts.
+
+ >>> Counter('abbbc') - Counter('bccd')
+ Counter({'b': 2, 'a': 1})
+
+ '''
+ if not isinstance(other, Counter):
+ return NotImplemented
+ result = Counter()
+ for elem in set(self) | set(other):
+ newcount = self[elem] - other[elem]
+ if newcount > 0:
+ result[elem] = newcount
+ return result
+
+ def __or__(self, other):
+ '''Union is the maximum of value in either of the input counters.
+
+ >>> Counter('abbb') | Counter('bcc')
+ Counter({'b': 3, 'c': 2, 'a': 1})
+
+ '''
+ if not isinstance(other, Counter):
+ return NotImplemented
+ _max = max
+ result = Counter()
+ for elem in set(self) | set(other):
+ newcount = _max(self[elem], other[elem])
+ if newcount > 0:
+ result[elem] = newcount
+ return result
+
+ def __and__(self, other):
+ ''' Intersection is the minimum of corresponding counts.
+
+ >>> Counter('abbb') & Counter('bcc')
+ Counter({'b': 1})
+
+ '''
+ if not isinstance(other, Counter):
+ return NotImplemented
+ _min = min
+ result = Counter()
+ if len(self) < len(other):
+ self, other = other, self
+ for elem in ifilter(self.__contains__, other):
+ newcount = _min(self[elem], other[elem])
+ if newcount > 0:
+ result[elem] = newcount
+ return result
+
+ if __name__ == '__main__':
+ import doctest
+ print(doctest.testmod())
diff --git a/tests/scanner/annotationparser/test_patterns.py b/tests/scanner/annotationparser/test_patterns.py
index ce82cf90..ead487d7 100644
--- a/tests/scanner/annotationparser/test_patterns.py
+++ b/tests/scanner/annotationparser/test_patterns.py
@@ -32,6 +32,7 @@ against the expected output.
from giscanner.annotationparser import (COMMENT_BLOCK_START_RE, COMMENT_BLOCK_END_RE,
+ COMMENT_ASTERISK_RE,
SECTION_RE, SYMBOL_RE, PROPERTY_RE,
SIGNAL_RE, PARAMETER_RE, TAG_RE,
TAG_VALUE_VERSION_RE, TAG_VALUE_STABILITY_RE)
@@ -157,6 +158,23 @@ comment_end_tests = [
'code': ''})]
+comment_asterisk_tests = [
+ (COMMENT_ASTERISK_RE, '*',
+ {}),
+ (COMMENT_ASTERISK_RE, '* ',
+ {}),
+ (COMMENT_ASTERISK_RE, ' *',
+ {}),
+ (COMMENT_ASTERISK_RE, ' * ',
+ {}),
+ (COMMENT_ASTERISK_RE, ' * ',
+ {}),
+ (COMMENT_ASTERISK_RE, ' * test',
+ {}),
+ (COMMENT_ASTERISK_RE, 'test * ',
+ None)]
+
+
identifier_section_tests = [
(SECTION_RE, 'TSIEOCN',
None),
@@ -733,6 +751,7 @@ class TestProgram(TestCase):
if __name__ == '__main__':
create_tests('test_comment_start', comment_start_tests)
create_tests('test_comment_end', comment_end_tests)
+ create_tests('test_comment_asterisk', comment_asterisk_tests)
create_tests('test_identifier_section', identifier_section_tests)
create_tests('test_identifier_symbol', identifier_symbol_tests)
create_tests('test_identifier_property', identifier_property_tests)