diff options
Diffstat (limited to 'bzrlib/tests/test_globbing.py')
-rw-r--r-- | bzrlib/tests/test_globbing.py | 386 |
1 files changed, 386 insertions, 0 deletions
diff --git a/bzrlib/tests/test_globbing.py b/bzrlib/tests/test_globbing.py new file mode 100644 index 0000000..ea6e2a2 --- /dev/null +++ b/bzrlib/tests/test_globbing.py @@ -0,0 +1,386 @@ +# Copyright (C) 2006-2011 Canonical Ltd +# -*- coding: utf-8 -*- +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +import re + +from bzrlib import errors +from bzrlib.globbing import ( + Globster, + ExceptionGlobster, + _OrderedGlobster, + normalize_pattern + ) +from bzrlib.tests import ( + TestCase, + ) + + +class TestGlobster(TestCase): + + def assertMatch(self, matchset, glob_prefix=None): + for glob, positive, negative in matchset: + if glob_prefix: + glob = glob_prefix + glob + globster = Globster([glob]) + for name in positive: + self.assertTrue(globster.match(name), repr( + u'name "%s" does not match glob "%s" (re=%s)' % + (name, glob, globster._regex_patterns[0][0].pattern))) + for name in negative: + self.assertFalse(globster.match(name), repr( + u'name "%s" does match glob "%s" (re=%s)' % + (name, glob, globster._regex_patterns[0][0].pattern))) + + def assertMatchBasenameAndFullpath(self, matchset): + # test basename matcher + self.assertMatch(matchset) + # test fullpath matcher + self.assertMatch(matchset, glob_prefix='./') + + def test_char_group_digit(self): + self.assertMatchBasenameAndFullpath([ + # The definition of digit this uses includes arabic digits from + # non-latin scripts (arabic, indic, etc.) but neither roman + # numerals nor vulgar fractions. Some characters such as + # subscript/superscript digits may or may not match depending on + # the Python version used, see: <http://bugs.python.org/issue6561> + (u'[[:digit:]]', + [u'0', u'5', u'\u0663', u'\u06f9', u'\u0f21'], + [u'T', u'q', u' ', u'\u8336', u'.']), + (u'[^[:digit:]]', + [u'T', u'q', u' ', u'\u8336', u'.'], + [u'0', u'5', u'\u0663', u'\u06f9', u'\u0f21']), + ]) + + def test_char_group_space(self): + self.assertMatchBasenameAndFullpath([ + (u'[[:space:]]', + [u' ', u'\t', u'\n', u'\xa0', u'\u2000', u'\u2002'], + [u'a', u'-', u'\u8336', u'.']), + (u'[^[:space:]]', + [u'a', u'-', u'\u8336', u'.'], + [u' ', u'\t', u'\n', u'\xa0', u'\u2000', u'\u2002']), + ]) + + def test_char_group_alnum(self): + self.assertMatchBasenameAndFullpath([ + (u'[[:alnum:]]', + [u'a', u'Z', u'\u017e', u'\u8336'], + [u':', u'-', u'\u25cf', u'.']), + (u'[^[:alnum:]]', + [u':', u'-', u'\u25cf', u'.'], + [u'a']), + ]) + + def test_char_group_ascii(self): + self.assertMatchBasenameAndFullpath([ + (u'[[:ascii:]]', + [u'a', u'Q', u'^', u'.'], + [u'\xcc', u'\u8336']), + (u'[^[:ascii:]]', + [u'\xcc', u'\u8336'], + [u'a', u'Q', u'^', u'.']), + ]) + + def test_char_group_blank(self): + self.assertMatchBasenameAndFullpath([ + (u'[[:blank:]]', + [u'\t'], + [u'x', u'y', u'z', u'.']), + (u'[^[:blank:]]', + [u'x', u'y', u'z', u'.'], + [u'\t']), + ]) + + def test_char_group_cntrl(self): + self.assertMatchBasenameAndFullpath([ + (u'[[:cntrl:]]', + [u'\b', u'\t', '\x7f'], + [u'a', u'Q', u'\u8336', u'.']), + (u'[^[:cntrl:]]', + [u'a', u'Q', u'\u8336', u'.'], + [u'\b', u'\t', '\x7f']), + ]) + + def test_char_group_range(self): + self.assertMatchBasenameAndFullpath([ + (u'[a-z]', + [u'a', u'q', u'f'], + [u'A', u'Q', u'F']), + (u'[^a-z]', + [u'A', u'Q', u'F'], + [u'a', u'q', u'f']), + (u'[!a-z]foo', + [u'Afoo', u'.foo'], + [u'afoo', u'ABfoo']), + (u'foo[!a-z]bar', + [u'fooAbar', u'foo.bar'], + [u'foojbar']), + (u'[\x20-\x30\u8336]', + [u'\040', u'\044', u'\u8336'], + [u'\x1f']), + (u'[^\x20-\x30\u8336]', + [u'\x1f'], + [u'\040', u'\044', u'\u8336']), + ]) + + def test_regex(self): + self.assertMatch([ + (u'RE:(a|b|c+)', + [u'a', u'b', u'ccc'], + [u'd', u'aa', u'c+', u'-a']), + (u'RE:(?:a|b|c+)', + [u'a', u'b', u'ccc'], + [u'd', u'aa', u'c+', u'-a']), + (u'RE:(?P<a>.)(?P=a)', + [u'a'], + [u'ab', u'aa', u'aaa']), + # test we can handle odd numbers of trailing backslashes + (u'RE:a\\\\\\', + [u'a\\'], + [u'a', u'ab', u'aa', u'aaa']), + ]) + + def test_question_mark(self): + self.assertMatch([ + (u'?foo', + [u'xfoo', u'bar/xfoo', u'bar/\u8336foo', u'.foo', u'bar/.foo'], + [u'bar/foo', u'foo']), + (u'foo?bar', + [u'fooxbar', u'foo.bar', u'foo\u8336bar', u'qyzzy/foo.bar'], + [u'foo/bar']), + (u'foo/?bar', + [u'foo/xbar', u'foo/\u8336bar', u'foo/.bar'], + [u'foo/bar', u'bar/foo/xbar']), + ]) + + def test_asterisk(self): + self.assertMatch([ + (u'x*x', + [u'xx', u'x.x', u'x\u8336..x', u'\u8336/x.x', u'x.y.x'], + [u'x/x', u'bar/x/bar/x', u'bax/abaxab']), + (u'foo/*x', + [u'foo/x', u'foo/bax', u'foo/a.x', u'foo/.x', u'foo/.q.x'], + [u'foo/bar/bax']), + (u'*/*x', + [u'\u8336/x', u'foo/x', u'foo/bax', u'x/a.x', u'.foo/x', + u'\u8336/.x', u'foo/.q.x'], + [u'foo/bar/bax']), + (u'f*', + [u'foo', u'foo.bar'], + [u'.foo', u'foo/bar', u'foo/.bar']), + (u'*bar', + [u'bar', u'foobar', ur'foo\nbar', u'foo.bar', u'foo/bar', + u'foo/foobar', u'foo/f.bar', u'.bar', u'foo/.bar'], + []), + ]) + + def test_double_asterisk(self): + self.assertMatch([ + # expected uses of double asterisk + (u'foo/**/x', + [u'foo/x', u'foo/bar/x'], + [u'foox', u'foo/bax', u'foo/.x', u'foo/bar/bax']), + (u'**/bar', + [u'bar', u'foo/bar'], + [u'foobar', u'foo.bar', u'foo/foobar', u'foo/f.bar', + u'.bar', u'foo/.bar']), + # check that we ignore extra *s, so *** is treated like ** not *. + (u'foo/***/x', + [u'foo/x', u'foo/bar/x'], + [u'foox', u'foo/bax', u'foo/.x', u'foo/bar/bax']), + (u'***/bar', + [u'bar', u'foo/bar'], + [u'foobar', u'foo.bar', u'foo/foobar', u'foo/f.bar', + u'.bar', u'foo/.bar']), + # the remaining tests check that ** is interpreted as * + # unless it is a whole path component + (u'x**/x', + [u'x\u8336/x', u'x/x'], + [u'xx', u'x.x', u'bar/x/bar/x', u'x.y.x', u'x/y/x']), + (u'x**x', + [u'xx', u'x.x', u'x\u8336..x', u'foo/x.x', u'x.y.x'], + [u'bar/x/bar/x', u'xfoo/bar/x', u'x/x', u'bax/abaxab']), + (u'foo/**x', + [u'foo/x', u'foo/bax', u'foo/a.x', u'foo/.x', u'foo/.q.x'], + [u'foo/bar/bax']), + (u'f**', + [u'foo', u'foo.bar'], + [u'.foo', u'foo/bar', u'foo/.bar']), + (u'**bar', + [u'bar', u'foobar', ur'foo\nbar', u'foo.bar', u'foo/bar', + u'foo/foobar', u'foo/f.bar', u'.bar', u'foo/.bar'], + []), + ]) + + def test_leading_dot_slash(self): + self.assertMatch([ + (u'./foo', + [u'foo'], + [u'\u8336/foo', u'barfoo', u'x/y/foo']), + (u'./f*', + [u'foo'], + [u'foo/bar', u'foo/.bar', u'x/foo/y']), + ]) + + def test_backslash(self): + self.assertMatch([ + (u'.\\foo', + [u'foo'], + [u'\u8336/foo', u'barfoo', u'x/y/foo']), + (u'.\\f*', + [u'foo'], + [u'foo/bar', u'foo/.bar', u'x/foo/y']), + (u'foo\\**\\x', + [u'foo/x', u'foo/bar/x'], + [u'foox', u'foo/bax', u'foo/.x', u'foo/bar/bax']), + ]) + + def test_trailing_slash(self): + self.assertMatch([ + (u'./foo/', + [u'foo'], + [u'\u8336/foo', u'barfoo', u'x/y/foo']), + (u'.\\foo\\', + [u'foo'], + [u'foo/', u'\u8336/foo', u'barfoo', u'x/y/foo']), + ]) + + def test_leading_asterisk_dot(self): + self.assertMatch([ + (u'*.x', + [u'foo/bar/baz.x', u'\u8336/Q.x', u'foo.y.x', u'.foo.x', + u'bar/.foo.x', u'.x',], + [u'foo.x.y']), + (u'foo/*.bar', + [u'foo/b.bar', u'foo/a.b.bar', u'foo/.bar'], + [u'foo/bar']), + (u'*.~*', + [u'foo.py.~1~', u'.foo.py.~1~'], + []), + ]) + + def test_end_anchor(self): + self.assertMatch([ + (u'*.333', + [u'foo.333'], + [u'foo.3']), + (u'*.3', + [u'foo.3'], + [u'foo.333']), + ]) + + def test_mixed_globs(self): + """tests handling of combinations of path type matches. + + The types being extension, basename and full path. + """ + patterns = [ u'*.foo', u'.*.swp', u'./*.png'] + globster = Globster(patterns) + self.assertEqual(u'*.foo', globster.match('bar.foo')) + self.assertEqual(u'./*.png', globster.match('foo.png')) + self.assertEqual(None, globster.match('foo/bar.png')) + self.assertEqual(u'.*.swp', globster.match('foo/.bar.py.swp')) + + def test_large_globset(self): + """tests that the globster can handle a large set of patterns. + + Large is defined as more than supported by python regex groups, + i.e. 99. + This test assumes the globs are broken into regexs containing 99 + groups. + """ + patterns = [ u'*.%03d' % i for i in xrange(0,300) ] + globster = Globster(patterns) + # test the fence posts + for x in (0,98,99,197,198,296,297,299): + filename = u'foo.%03d' % x + self.assertEqual(patterns[x],globster.match(filename)) + self.assertEqual(None,globster.match('foobar.300')) + + def test_bad_pattern(self): + """Ensure that globster handles bad patterns cleanly.""" + patterns = [u'RE:[', u'/home/foo', u'RE:*.cpp'] + g = Globster(patterns) + e = self.assertRaises(errors.InvalidPattern, g.match, 'filename') + self.assertContainsRe(e.msg, + "File.*ignore.*contains error.*RE:\[.*RE:\*\.cpp", flags=re.DOTALL) + + +class TestExceptionGlobster(TestCase): + + def test_exclusion_patterns(self): + """test that exception patterns are not matched""" + patterns = [ u'*', u'!./local', u'!./local/**/*', u'!RE:\.z.*',u'!!./.zcompdump' ] + globster = ExceptionGlobster(patterns) + self.assertEqual(u'*', globster.match('tmp/foo.txt')) + self.assertEqual(None, globster.match('local')) + self.assertEqual(None, globster.match('local/bin/wombat')) + self.assertEqual(None, globster.match('.zshrc')) + self.assertEqual(None, globster.match('.zfunctions/fiddle/flam')) + self.assertEqual(u'!!./.zcompdump', globster.match('.zcompdump')) + + def test_exclusion_order(self): + """test that ordering of exclusion patterns does not matter""" + patterns = [ u'static/**/*.html', u'!static/**/versionable.html'] + globster = ExceptionGlobster(patterns) + self.assertEqual(u'static/**/*.html', globster.match('static/foo.html')) + self.assertEqual(None, globster.match('static/versionable.html')) + self.assertEqual(None, globster.match('static/bar/versionable.html')) + globster = ExceptionGlobster(reversed(patterns)) + self.assertEqual(u'static/**/*.html', globster.match('static/foo.html')) + self.assertEqual(None, globster.match('static/versionable.html')) + self.assertEqual(None, globster.match('static/bar/versionable.html')) + +class TestOrderedGlobster(TestCase): + + def test_ordered_globs(self): + """test that the first match in a list is the one found""" + patterns = [ u'*.foo', u'bar.*'] + globster = _OrderedGlobster(patterns) + self.assertEqual(u'*.foo', globster.match('bar.foo')) + self.assertEqual(None, globster.match('foo.bar')) + globster = _OrderedGlobster(reversed(patterns)) + self.assertEqual(u'bar.*', globster.match('bar.foo')) + self.assertEqual(None, globster.match('foo.bar')) + + +class TestNormalizePattern(TestCase): + + def test_backslashes(self): + """tests that backslashes are converted to forward slashes, multiple + backslashes are collapsed to single forward slashes and trailing + backslashes are removed""" + self.assertEqual(u'/', normalize_pattern(u'\\')) + self.assertEqual(u'/', normalize_pattern(u'\\\\')) + self.assertEqual(u'/foo/bar', normalize_pattern(u'\\foo\\bar')) + self.assertEqual(u'foo/bar', normalize_pattern(u'foo\\bar\\')) + self.assertEqual(u'/foo/bar', normalize_pattern(u'\\\\foo\\\\bar\\\\')) + + def test_forward_slashes(self): + """tests that multiple foward slashes are collapsed to single forward + slashes and trailing forward slashes are removed""" + self.assertEqual(u'/', normalize_pattern(u'/')) + self.assertEqual(u'/', normalize_pattern(u'//')) + self.assertEqual(u'/foo/bar', normalize_pattern(u'/foo/bar')) + self.assertEqual(u'foo/bar', normalize_pattern(u'foo/bar/')) + self.assertEqual(u'/foo/bar', normalize_pattern(u'//foo//bar//')) + + def test_mixed_slashes(self): + """tests that multiple mixed slashes are collapsed to single forward + slashes and trailing mixed slashes are removed""" + self.assertEqual(u'/foo/bar', normalize_pattern(u'\\/\\foo//\\///bar/\\\\/')) |