summaryrefslogtreecommitdiff
path: root/bzrlib/cmdline.py
diff options
context:
space:
mode:
Diffstat (limited to 'bzrlib/cmdline.py')
-rw-r--r--bzrlib/cmdline.py166
1 files changed, 166 insertions, 0 deletions
diff --git a/bzrlib/cmdline.py b/bzrlib/cmdline.py
new file mode 100644
index 0000000..2cb4983
--- /dev/null
+++ b/bzrlib/cmdline.py
@@ -0,0 +1,166 @@
+# Copyright (C) 2010-2011 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""Unicode-compatible command-line splitter for all platforms.
+
+The user-visible behaviour of this module is described in
+configuring_bazaar.txt.
+"""
+
+from __future__ import absolute_import
+
+import re
+
+
+_whitespace_match = re.compile(u'\s', re.UNICODE).match
+
+
+class _PushbackSequence(object):
+ def __init__(self, orig):
+ self._iter = iter(orig)
+ self._pushback_buffer = []
+
+ def next(self):
+ if len(self._pushback_buffer) > 0:
+ return self._pushback_buffer.pop()
+ else:
+ return self._iter.next()
+
+ def pushback(self, char):
+ self._pushback_buffer.append(char)
+
+ def __iter__(self):
+ return self
+
+
+class _Whitespace(object):
+ def process(self, next_char, context):
+ if _whitespace_match(next_char):
+ if len(context.token) > 0:
+ return None
+ else:
+ return self
+ elif next_char in context.allowed_quote_chars:
+ context.quoted = True
+ return _Quotes(next_char, self)
+ elif next_char == u'\\':
+ return _Backslash(self)
+ else:
+ context.token.append(next_char)
+ return _Word()
+
+
+class _Quotes(object):
+ def __init__(self, quote_char, exit_state):
+ self.quote_char = quote_char
+ self.exit_state = exit_state
+
+ def process(self, next_char, context):
+ if next_char == u'\\':
+ return _Backslash(self)
+ elif next_char == self.quote_char:
+ return self.exit_state
+ else:
+ context.token.append(next_char)
+ return self
+
+
+class _Backslash(object):
+ # See http://msdn.microsoft.com/en-us/library/bb776391(VS.85).aspx
+ def __init__(self, exit_state):
+ self.exit_state = exit_state
+ self.count = 1
+
+ def process(self, next_char, context):
+ if next_char == u'\\':
+ self.count += 1
+ return self
+ elif next_char in context.allowed_quote_chars:
+ # 2N backslashes followed by a quote are N backslashes
+ context.token.append(u'\\' * (self.count/2))
+ # 2N+1 backslashes follwed by a quote are N backslashes followed by
+ # the quote which should not be processed as the start or end of
+ # the quoted arg
+ if self.count % 2 == 1:
+ # odd number of \ escapes the quote
+ context.token.append(next_char)
+ else:
+ # let exit_state handle next_char
+ context.seq.pushback(next_char)
+ self.count = 0
+ return self.exit_state
+ else:
+ # N backslashes not followed by a quote are just N backslashes
+ if self.count > 0:
+ context.token.append(u'\\' * self.count)
+ self.count = 0
+ # let exit_state handle next_char
+ context.seq.pushback(next_char)
+ return self.exit_state
+
+ def finish(self, context):
+ if self.count > 0:
+ context.token.append(u'\\' * self.count)
+
+
+class _Word(object):
+ def process(self, next_char, context):
+ if _whitespace_match(next_char):
+ return None
+ elif next_char in context.allowed_quote_chars:
+ return _Quotes(next_char, self)
+ elif next_char == u'\\':
+ return _Backslash(self)
+ else:
+ context.token.append(next_char)
+ return self
+
+
+class Splitter(object):
+ def __init__(self, command_line, single_quotes_allowed):
+ self.seq = _PushbackSequence(command_line)
+ self.allowed_quote_chars = u'"'
+ if single_quotes_allowed:
+ self.allowed_quote_chars += u"'"
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ quoted, token = self._get_token()
+ if token is None:
+ raise StopIteration
+ return quoted, token
+
+ def _get_token(self):
+ self.quoted = False
+ self.token = []
+ state = _Whitespace()
+ for next_char in self.seq:
+ state = state.process(next_char, self)
+ if state is None:
+ break
+ if not state is None and not getattr(state, 'finish', None) is None:
+ state.finish(self)
+ result = u''.join(self.token)
+ if not self.quoted and result == '':
+ result = None
+ return self.quoted, result
+
+
+def split(unsplit, single_quotes_allowed=True):
+ splitter = Splitter(unsplit, single_quotes_allowed=single_quotes_allowed)
+ return [arg for quoted, arg in splitter]