1 files changed, 166 insertions, 0 deletions
diff --git a/bzrlib/cmdline.py b/bzrlib/cmdline.py
new file mode 100644
index 0000000..2cb4983
--- /dev/null
+++ b/bzrlib/cmdline.py
@@ -0,0 +1,166 @@
+# Copyright (C) 2010-2011 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""Unicode-compatible command-line splitter for all platforms.
+
+The user-visible behaviour of this module is described in
+configuring_bazaar.txt.
+"""
+
+from __future__ import absolute_import
+
+import re
+
+
+_whitespace_match = re.compile(u'\s', re.UNICODE).match
+
+
+class _PushbackSequence(object):
+    def __init__(self, orig):
+        self._iter = iter(orig)
+        self._pushback_buffer = []
+
+    def next(self):
+        if len(self._pushback_buffer) > 0:
+            return self._pushback_buffer.pop()
+        else:
+            return self._iter.next()
+
+    def pushback(self, char):
+        self._pushback_buffer.append(char)
+
+    def __iter__(self):
+        return self
+
+
+class _Whitespace(object):
+    def process(self, next_char, context):
+        if _whitespace_match(next_char):
+            if len(context.token) > 0:
+                return None
+            else:
+                return self
+        elif next_char in context.allowed_quote_chars:
+            context.quoted = True
+            return _Quotes(next_char, self)
+        elif next_char == u'\\':
+            return _Backslash(self)
+        else:
+            context.token.append(next_char)
+            return _Word()
+
+
+class _Quotes(object):
+    def __init__(self, quote_char, exit_state):
+        self.quote_char = quote_char
+        self.exit_state = exit_state
+
+    def process(self, next_char, context):
+        if next_char == u'\\':
+            return _Backslash(self)
+        elif next_char == self.quote_char:
+            return self.exit_state
+        else:
+            context.token.append(next_char)
+            return self
+
+
+class _Backslash(object):
+    # See http://msdn.microsoft.com/en-us/library/bb776391(VS.85).aspx
+    def __init__(self, exit_state):
+        self.exit_state = exit_state
+        self.count = 1
+
+    def process(self, next_char, context):
+        if next_char == u'\\':
+            self.count += 1
+            return self
+        elif next_char in context.allowed_quote_chars:
+            # 2N backslashes followed by a quote are N backslashes
+            context.token.append(u'\\' * (self.count/2))
+            # 2N+1 backslashes follwed by a quote are N backslashes followed by
+            # the quote which should not be processed as the start or end of
+            # the quoted arg
+            if self.count % 2 == 1:
+                # odd number of \ escapes the quote
+                context.token.append(next_char)
+            else:
+                # let exit_state handle next_char
+                context.seq.pushback(next_char)
+            self.count = 0
+            return self.exit_state
+        else:
+            # N backslashes not followed by a quote are just N backslashes
+            if self.count > 0:
+                context.token.append(u'\\' * self.count)
+                self.count = 0
+            # let exit_state handle next_char
+            context.seq.pushback(next_char)
+            return self.exit_state
+
+    def finish(self, context):
+        if self.count > 0:
+            context.token.append(u'\\' * self.count)
+
+
+class _Word(object):
+    def process(self, next_char, context):
+        if _whitespace_match(next_char):
+            return None
+        elif next_char in context.allowed_quote_chars:
+            return _Quotes(next_char, self)
+        elif next_char == u'\\':
+            return _Backslash(self)
+        else:
+            context.token.append(next_char)
+            return self
+
+
+class Splitter(object):
+    def __init__(self, command_line, single_quotes_allowed):
+        self.seq = _PushbackSequence(command_line)
+        self.allowed_quote_chars = u'"'
+        if single_quotes_allowed:
+            self.allowed_quote_chars += u"'"
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        quoted, token = self._get_token()
+        if token is None:
+            raise StopIteration
+        return quoted, token
+
+    def _get_token(self):
+        self.quoted = False
+        self.token = []
+        state = _Whitespace()
+        for next_char in self.seq:
+            state = state.process(next_char, self)
+            if state is None:
+                break
+        if not state is None and not getattr(state, 'finish', None) is None:
+            state.finish(self)
+        result = u''.join(self.token)
+        if not self.quoted and result == '':
+            result = None
+        return self.quoted, result
+
+
+def split(unsplit, single_quotes_allowed=True):
+    splitter = Splitter(unsplit, single_quotes_allowed=single_quotes_allowed)
+    return [arg for quoted, arg in splitter]