summaryrefslogtreecommitdiff
path: root/git-p4.py
diff options
context:
space:
mode:
authorYang Zhao <yang.zhao@skyboxlabs.com>2019-12-13 15:52:40 -0800
committerJunio C Hamano <gitster@pobox.com>2020-01-15 12:53:40 -0800
commitd38208a297e76bbfbfa8e485632c217aaafa9486 (patch)
tree4f53244e4919935f4a03b951e879162a0f86ba57 /git-p4.py
parent86dca24b7be6491fb8d4b7a499ccf4a43776d1ab (diff)
downloadgit-d38208a297e76bbfbfa8e485632c217aaafa9486.tar.gz
git-p4: convert path to unicode before processing them
P4 allows essentially arbitrary encoding for path data while we would perfer to be dealing only with unicode strings. Since path data need to survive round-trip back to p4, this patch implements the general policy that we store path data as-is, but decode them to unicode before doing any non-trivial processing. A new `decode_path()` method is provided that generally does the correct conversion, taking into account `git-p4.pathEncoding` configuration. For python2.7, path strings will be left as-is if it only contains ASCII characters. For python3, decoding is always done so that we have str objects. Signed-off-by: Yang Zhao <yang.zhao@skyboxlabs.com> Reviewed-by: Ben Keene <seraphire@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'git-p4.py')
-rwxr-xr-xgit-p4.py69
1 files changed, 44 insertions, 25 deletions
diff --git a/git-p4.py b/git-p4.py
index d62fb05989..d43f373b2d 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -150,6 +150,21 @@ else:
def encode_text_stream(s):
return s.encode('utf_8') if isinstance(s, unicode) else s
+def decode_path(path):
+ """Decode a given string (bytes or otherwise) using configured path encoding options
+ """
+ encoding = gitConfig('git-p4.pathEncoding') or 'utf_8'
+ if bytes is not str:
+ return path.decode(encoding, errors='replace') if isinstance(path, bytes) else path
+ else:
+ try:
+ path.decode('ascii')
+ except:
+ path = path.decode(encoding, errors='replace')
+ if verbose:
+ print('Path with non-ASCII characters detected. Used {} to decode: {}'.format(encoding, path))
+ return path
+
def write_pipe(c, stdin):
if verbose:
sys.stderr.write('Writing pipe: %s\n' % str(c))
@@ -697,7 +712,8 @@ def p4Where(depotPath):
if "depotFile" in entry:
# Search for the base client side depot path, as long as it starts with the branch's P4 path.
# The base path always ends with "/...".
- if entry["depotFile"].find(depotPath) == 0 and entry["depotFile"][-4:] == "/...":
+ entry_path = decode_path(entry['depotFile'])
+ if entry_path.find(depotPath) == 0 and entry_path[-4:] == "/...":
output = entry
break
elif "data" in entry:
@@ -712,11 +728,11 @@ def p4Where(depotPath):
return ""
clientPath = ""
if "path" in output:
- clientPath = output.get("path")
+ clientPath = decode_path(output['path'])
elif "data" in output:
data = output.get("data")
- lastSpace = data.rfind(" ")
- clientPath = data[lastSpace + 1:]
+ lastSpace = data.rfind(b" ")
+ clientPath = decode_path(data[lastSpace + 1:])
if clientPath.endswith("..."):
clientPath = clientPath[:-3]
@@ -2484,7 +2500,7 @@ class View(object):
def convert_client_path(self, clientFile):
# chop off //client/ part to make it relative
- if not clientFile.startswith(self.client_prefix):
+ if not decode_path(clientFile).startswith(self.client_prefix):
die("No prefix '%s' on clientFile '%s'" %
(self.client_prefix, clientFile))
return clientFile[len(self.client_prefix):]
@@ -2493,7 +2509,7 @@ class View(object):
""" Caching file paths by "p4 where" batch query """
# List depot file paths exclude that already cached
- fileArgs = [f['path'] for f in files if f['path'] not in self.client_spec_path_cache]
+ fileArgs = [f['path'] for f in files if decode_path(f['path']) not in self.client_spec_path_cache]
if len(fileArgs) == 0:
return # All files in cache
@@ -2508,16 +2524,18 @@ class View(object):
if "unmap" in res:
# it will list all of them, but only one not unmap-ped
continue
+ depot_path = decode_path(res['depotFile'])
if gitConfigBool("core.ignorecase"):
- res['depotFile'] = res['depotFile'].lower()
- self.client_spec_path_cache[res['depotFile']] = self.convert_client_path(res["clientFile"])
+ depot_path = depot_path.lower()
+ self.client_spec_path_cache[depot_path] = self.convert_client_path(res["clientFile"])
# not found files or unmap files set to ""
for depotFile in fileArgs:
+ depotFile = decode_path(depotFile)
if gitConfigBool("core.ignorecase"):
depotFile = depotFile.lower()
if depotFile not in self.client_spec_path_cache:
- self.client_spec_path_cache[depotFile] = ""
+ self.client_spec_path_cache[depotFile] = b''
def map_in_client(self, depot_path):
"""Return the relative location in the client where this
@@ -2635,7 +2653,7 @@ class P4Sync(Command, P4UserMap):
elif path.lower() == p.lower():
return False
for p in self.depotPaths:
- if p4PathStartsWith(path, p):
+ if p4PathStartsWith(path, decode_path(p)):
return True
return False
@@ -2644,7 +2662,7 @@ class P4Sync(Command, P4UserMap):
fnum = 0
while "depotFile%s" % fnum in commit:
path = commit["depotFile%s" % fnum]
- found = self.isPathWanted(path)
+ found = self.isPathWanted(decode_path(path))
if not found:
fnum = fnum + 1
continue
@@ -2678,7 +2696,7 @@ class P4Sync(Command, P4UserMap):
if self.useClientSpec:
# branch detection moves files up a level (the branch name)
# from what client spec interpretation gives
- path = self.clientSpecDirs.map_in_client(path)
+ path = decode_path(self.clientSpecDirs.map_in_client(path))
if self.detectBranches:
for b in self.knownBranches:
if p4PathStartsWith(path, b + "/"):
@@ -2712,14 +2730,15 @@ class P4Sync(Command, P4UserMap):
branches = {}
fnum = 0
while "depotFile%s" % fnum in commit:
- path = commit["depotFile%s" % fnum]
+ raw_path = commit["depotFile%s" % fnum]
+ path = decode_path(raw_path)
found = self.isPathWanted(path)
if not found:
fnum = fnum + 1
continue
file = {}
- file["path"] = path
+ file["path"] = raw_path
file["rev"] = commit["rev%s" % fnum]
file["action"] = commit["action%s" % fnum]
file["type"] = commit["type%s" % fnum]
@@ -2728,7 +2747,7 @@ class P4Sync(Command, P4UserMap):
# start with the full relative path where this file would
# go in a p4 client
if self.useClientSpec:
- relPath = self.clientSpecDirs.map_in_client(path)
+ relPath = decode_path(self.clientSpecDirs.map_in_client(path))
else:
relPath = self.stripRepoPath(path, self.depotPaths)
@@ -2766,14 +2785,15 @@ class P4Sync(Command, P4UserMap):
# - helper for streamP4Files
def streamOneP4File(self, file, contents):
- relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes)
- relPath = self.encodeWithUTF8(relPath)
+ file_path = file['depotFile']
+ relPath = self.stripRepoPath(decode_path(file_path), self.branchPrefixes)
+
if verbose:
if 'fileSize' in self.stream_file:
size = int(self.stream_file['fileSize'])
else:
size = 0 # deleted files don't get a fileSize apparently
- sys.stdout.write('\r%s --> %s (%i MB)\n' % (file['depotFile'], relPath, size/1024/1024))
+ sys.stdout.write('\r%s --> %s (%i MB)\n' % (file_path, relPath, size/1024/1024))
sys.stdout.flush()
(type_base, type_mods) = split_p4_type(file["type"])
@@ -2791,7 +2811,7 @@ class P4Sync(Command, P4UserMap):
# to nothing. This causes p4 errors when checking out such
# a change, and errors here too. Work around it by ignoring
# the bad symlink; hopefully a future change fixes it.
- print("\nIgnoring empty symlink in %s" % file['depotFile'])
+ print("\nIgnoring empty symlink in %s" % file_path)
return
elif data[-1] == '\n':
contents = [data[:-1]]
@@ -2810,7 +2830,7 @@ class P4Sync(Command, P4UserMap):
# just the native "NT" type.
#
try:
- text = p4_read_pipe(['print', '-q', '-o', '-', '%s@%s' % (file['depotFile'], file['change'])])
+ text = p4_read_pipe(['print', '-q', '-o', '-', '%s@%s' % (decode_path(file['depotFile']), file['change'])], raw=True)
except Exception as e:
if 'Translation of file content failed' in str(e):
type_base = 'binary'
@@ -2818,7 +2838,7 @@ class P4Sync(Command, P4UserMap):
raise e
else:
if p4_version_string().find('/NT') >= 0:
- text = text.replace('\r\n', '\n')
+ text = text.replace(b'\r\n', b'\n')
contents = [ text ]
if type_base == "apple":
@@ -2849,8 +2869,7 @@ class P4Sync(Command, P4UserMap):
self.writeToGitStream(git_mode, relPath, contents)
def streamOneP4Deletion(self, file):
- relPath = self.stripRepoPath(file['path'], self.branchPrefixes)
- relPath = self.encodeWithUTF8(relPath)
+ relPath = self.stripRepoPath(decode_path(file['path']), self.branchPrefixes)
if verbose:
sys.stdout.write("delete %s\n" % relPath)
sys.stdout.flush()
@@ -3037,8 +3056,8 @@ class P4Sync(Command, P4UserMap):
if self.clientSpecDirs:
self.clientSpecDirs.update_client_spec_path_cache(files)
- files = [f for f in files
- if self.inClientSpec(f['path']) and self.hasBranchPrefix(f['path'])]
+ files = [f for (f, path) in ((f, decode_path(f['path'])) for f in files)
+ if self.inClientSpec(path) and self.hasBranchPrefix(path)]
if gitConfigBool('git-p4.keepEmptyCommits'):
allow_empty = True