diff options
Diffstat (limited to 'bzrlib/cmdline.py')
-rw-r--r-- | bzrlib/cmdline.py | 166 |
1 files changed, 166 insertions, 0 deletions
diff --git a/bzrlib/cmdline.py b/bzrlib/cmdline.py new file mode 100644 index 0000000..2cb4983 --- /dev/null +++ b/bzrlib/cmdline.py @@ -0,0 +1,166 @@ +# Copyright (C) 2010-2011 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +"""Unicode-compatible command-line splitter for all platforms. + +The user-visible behaviour of this module is described in +configuring_bazaar.txt. +""" + +from __future__ import absolute_import + +import re + + +_whitespace_match = re.compile(u'\s', re.UNICODE).match + + +class _PushbackSequence(object): + def __init__(self, orig): + self._iter = iter(orig) + self._pushback_buffer = [] + + def next(self): + if len(self._pushback_buffer) > 0: + return self._pushback_buffer.pop() + else: + return self._iter.next() + + def pushback(self, char): + self._pushback_buffer.append(char) + + def __iter__(self): + return self + + +class _Whitespace(object): + def process(self, next_char, context): + if _whitespace_match(next_char): + if len(context.token) > 0: + return None + else: + return self + elif next_char in context.allowed_quote_chars: + context.quoted = True + return _Quotes(next_char, self) + elif next_char == u'\\': + return _Backslash(self) + else: + context.token.append(next_char) + return _Word() + + +class _Quotes(object): + def __init__(self, quote_char, exit_state): + self.quote_char = quote_char + self.exit_state = exit_state + + def process(self, next_char, context): + if next_char == u'\\': + return _Backslash(self) + elif next_char == self.quote_char: + return self.exit_state + else: + context.token.append(next_char) + return self + + +class _Backslash(object): + # See http://msdn.microsoft.com/en-us/library/bb776391(VS.85).aspx + def __init__(self, exit_state): + self.exit_state = exit_state + self.count = 1 + + def process(self, next_char, context): + if next_char == u'\\': + self.count += 1 + return self + elif next_char in context.allowed_quote_chars: + # 2N backslashes followed by a quote are N backslashes + context.token.append(u'\\' * (self.count/2)) + # 2N+1 backslashes follwed by a quote are N backslashes followed by + # the quote which should not be processed as the start or end of + # the quoted arg + if self.count % 2 == 1: + # odd number of \ escapes the quote + context.token.append(next_char) + else: + # let exit_state handle next_char + context.seq.pushback(next_char) + self.count = 0 + return self.exit_state + else: + # N backslashes not followed by a quote are just N backslashes + if self.count > 0: + context.token.append(u'\\' * self.count) + self.count = 0 + # let exit_state handle next_char + context.seq.pushback(next_char) + return self.exit_state + + def finish(self, context): + if self.count > 0: + context.token.append(u'\\' * self.count) + + +class _Word(object): + def process(self, next_char, context): + if _whitespace_match(next_char): + return None + elif next_char in context.allowed_quote_chars: + return _Quotes(next_char, self) + elif next_char == u'\\': + return _Backslash(self) + else: + context.token.append(next_char) + return self + + +class Splitter(object): + def __init__(self, command_line, single_quotes_allowed): + self.seq = _PushbackSequence(command_line) + self.allowed_quote_chars = u'"' + if single_quotes_allowed: + self.allowed_quote_chars += u"'" + + def __iter__(self): + return self + + def next(self): + quoted, token = self._get_token() + if token is None: + raise StopIteration + return quoted, token + + def _get_token(self): + self.quoted = False + self.token = [] + state = _Whitespace() + for next_char in self.seq: + state = state.process(next_char, self) + if state is None: + break + if not state is None and not getattr(state, 'finish', None) is None: + state.finish(self) + result = u''.join(self.token) + if not self.quoted and result == '': + result = None + return self.quoted, result + + +def split(unsplit, single_quotes_allowed=True): + splitter = Splitter(unsplit, single_quotes_allowed=single_quotes_allowed) + return [arg for quoted, arg in splitter] |