From 9ee861ae7a7b36a811aa4b5cc8172c5cbd6a945b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 5 Nov 2009 17:59:30 +0100 Subject: Added utilities helping to create proper paths either with slashes or backslashes depending on the operating system fixed test_refs and test_trees Many more issues remain though, this is just a first backup commit --- lib/git/index.py | 6 +-- lib/git/objects/base.py | 8 ++-- lib/git/objects/tree.py | 3 +- lib/git/refs.py | 12 +++--- lib/git/utils.py | 105 +++++++++++++++++++++++++++++++----------------- 5 files changed, 85 insertions(+), 49 deletions(-) (limited to 'lib/git') diff --git a/lib/git/index.py b/lib/git/index.py index e368f531..747ccc88 100644 --- a/lib/git/index.py +++ b/lib/git/index.py @@ -19,7 +19,7 @@ import subprocess import git.diff as diff from git.objects import Blob, Tree, Object, Commit -from git.utils import SHA1Writer, LazyMixin, ConcurrentWriteOperation +from git.utils import SHA1Writer, LazyMixin, ConcurrentWriteOperation, join_path_native class _TemporaryFileSwap(object): @@ -260,7 +260,7 @@ class IndexFile(LazyMixin, diff.Diffable): super(IndexFile, self)._set_cache_(attr) def _index_path(self): - return os.path.join(self.repo.path, "index") + return join_path_native(self.repo.path, "index") @property @@ -440,7 +440,7 @@ class IndexFile(LazyMixin, diff.Diffable): # as it considers existing entries. moving it essentially clears the index. # Unfortunately there is no 'soft' way to do it. # The _TemporaryFileSwap assure the original file get put back - index_handler = _TemporaryFileSwap(os.path.join(repo.path, 'index')) + index_handler = _TemporaryFileSwap(join_path_native(repo.path, 'index')) try: repo.git.read_tree(*arg_list, **kwargs) index = cls(repo, tmp_index) diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index b0989a43..c66263c0 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -4,7 +4,7 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php import os -from git.utils import LazyMixin +from git.utils import LazyMixin, join_path_native import utils _assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" @@ -209,7 +209,9 @@ class IndexObject(Object): """ Returns Absolute path to this index object in the file system ( as opposed to the - .path field which is a path relative to the git repository ) + .path field which is a path relative to the git repository ). + + The returned path will be native to the system and contains '\' on windows. """ - return os.path.join(self.repo.git.git_dir, self.path) + return join_path_native(self.repo.git.git_dir, self.path) diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index 27bd84d0..f88096b3 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -9,6 +9,7 @@ import blob import base import binascii import git.diff as diff +from git.utils import join_path def sha_to_hex(sha): """Takes a string and returns the hex of the sha within""" @@ -110,7 +111,7 @@ class Tree(base.IndexObject, diff.Diffable): i += 1 # END while not reached NULL name = data[ns:i] - path = os.path.join(self.path, name) + path = join_path(self.path, name) # byte is NULL, get next 20 i += 1 diff --git a/lib/git/refs.py b/lib/git/refs.py index 5b94ea07..b0996878 100644 --- a/lib/git/refs.py +++ b/lib/git/refs.py @@ -8,7 +8,7 @@ import os from objects import Object, Commit from objects.utils import get_object_type_by_name -from utils import LazyMixin, Iterable +from utils import LazyMixin, Iterable, join_path, join_path_native, to_native_path_linux class Reference(LazyMixin, Iterable): """ @@ -136,15 +136,15 @@ class Reference(LazyMixin, Iterable): # walk loose refs # Currently we do not follow links - for root, dirs, files in os.walk(os.path.join(repo.path, common_path)): + for root, dirs, files in os.walk(join_path_native(repo.path, common_path)): for f in files: - abs_path = os.path.join(root, f) - rela_paths.add(abs_path.replace(repo.path + '/', "")) + abs_path = to_native_path_linux(join_path(root, f)) + rela_paths.add(abs_path.replace(to_native_path_linux(repo.path) + '/', "")) # END for each file in root directory # END for each directory to walk # read packed refs - packed_refs_path = os.path.join(repo.path, 'packed-refs') + packed_refs_path = join_path_native(repo.path, 'packed-refs') if os.path.isfile(packed_refs_path): fp = open(packed_refs_path, 'r') try: @@ -230,7 +230,7 @@ class SymbolicReference(object): return hash(self.name) def _get_path(self): - return os.path.join(self.repo.path, self.name) + return join_path_native(self.repo.path, self.name) def _get_commit(self): """ diff --git a/lib/git/utils.py b/lib/git/utils.py index 48427ff2..4397fbb2 100644 --- a/lib/git/utils.py +++ b/lib/git/utils.py @@ -9,55 +9,88 @@ import sys import tempfile try: - import hashlib + import hashlib except ImportError: - import sha + import sha def make_sha(source=''): - """ - A python2.4 workaround for the sha/hashlib module fiasco - + """ + A python2.4 workaround for the sha/hashlib module fiasco + Note From the dulwich project """ - try: - return hashlib.sha1(source) - except NameError: - sha1 = sha.sha(source) - return sha1 + try: + return hashlib.sha1(source) + except NameError: + sha1 = sha.sha(source) + return sha1 + +def join_path(a, *p): + """Join path tokens together similar to os.path.join, but always use + '/' instead of possibly '\' on windows.""" + path = a + for b in p: + if b.startswith('/'): + path += b[1:] + elif path == '' or path.endswith('/'): + path += b + else: + path += '/' + b + return path + +def to_native_path_windows(path): + return path.replace('/','\\') + +def to_native_path_linux(path): + return path.replace('\\','/') + +if sys.platform.startswith('win'): + to_native_path = to_native_path_windows +else: + # no need for any work on linux + def to_native_path_linux(path): + return path + to_native_path = to_native_path_linux + +def join_path_native(a, *p): + """As join path, but makes sure an OS native path is returned. This is only + needed to play it safe on my dear windows and to assure nice paths that only + use '\'""" + return to_native_path(join_path(a, *p)) class SHA1Writer(object): - """ - Wrapper around a file-like object that remembers the SHA1 of - the data written to it. It will write a sha when the stream is closed - or if the asked for explicitly usign write_sha. - - Note: - Based on the dulwich project - """ - __slots__ = ("f", "sha1") - - def __init__(self, f): - self.f = f - self.sha1 = make_sha("") + """ + Wrapper around a file-like object that remembers the SHA1 of + the data written to it. It will write a sha when the stream is closed + or if the asked for explicitly usign write_sha. + + Note: + Based on the dulwich project + """ + __slots__ = ("f", "sha1") + + def __init__(self, f): + self.f = f + self.sha1 = make_sha("") - def write(self, data): - self.sha1.update(data) - self.f.write(data) + def write(self, data): + self.sha1.update(data) + self.f.write(data) - def write_sha(self): - sha = self.sha1.digest() - self.f.write(sha) - return sha + def write_sha(self): + sha = self.sha1.digest() + self.f.write(sha) + return sha - def close(self): - sha = self.write_sha() - self.f.close() - return sha + def close(self): + sha = self.write_sha() + self.f.close() + return sha - def tell(self): - return self.f.tell() + def tell(self): + return self.f.tell() class LockFile(object): -- cgit v1.2.1 From 5593dc014a41c563ba524b9303e75da46ee96bd5 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 5 Nov 2009 18:22:29 +0100 Subject: Fixed windows TASKKILL so it actually does something *silently* --- lib/git/cmd.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib/git') diff --git a/lib/git/cmd.py b/lib/git/cmd.py index fb6f2998..ab34fa58 100644 --- a/lib/git/cmd.py +++ b/lib/git/cmd.py @@ -63,7 +63,10 @@ class Git(object): os.kill(self.proc.pid, 2) # interrupt signal except AttributeError: # try windows - subprocess.call(("TASKKILL", "/T", "/PID", self.proc.pid)) + # for some reason, providing None for stdout/stderr still prints something. This is why + # we simply use the shell and redirect to nul. Its slower than CreateProcess, question + # is whether we really want to see all these messages. Its annoying no matter what. + subprocess.call(("TASKKILL /F /T /PID %s > nul" % str(self.proc.pid)), shell=True) # END exception handling def __getattr__(self, attr): -- cgit v1.2.1 From a67bf61b5017e41740e997147dc88282b09c6f86 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 5 Nov 2009 18:27:21 +0100 Subject: git cmd on windows now runs without the shell, see diff for explanation --- lib/git/cmd.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'lib/git') diff --git a/lib/git/cmd.py b/lib/git/cmd.py index ab34fa58..97d6613d 100644 --- a/lib/git/cmd.py +++ b/lib/git/cmd.py @@ -17,8 +17,13 @@ execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output', 'output_stream' ) extra = {} -if sys.platform == 'win32': - extra = {'shell': True} +# NOTE: Execution through a shell appears to be slightly faster, but in fact +# I consider it a problem whenever complex strings are passed and *interpreted* +# by the shell beforehand. This can cause great confusion and reduces compatability +# between the OS which is why the shell should not be used ( unless it does not work +# otherwise ) +#if sys.platform == 'win32': +# extra = {'shell': False} def dashify(string): return string.replace('_', '-') -- cgit v1.2.1 From 46c9a0df4403266a320059eaa66e7dce7b3d9ac4 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 5 Nov 2009 20:09:35 +0100 Subject: cmd: added clear_cache method now used by test repo decorators to be sure persistent commands are killed before trying to remove the directory. Unfortunately, it still claims someone has opened the file. handle.exe does not show anyone, so what is happening here ? Is it just a windows odity ? If nothing helps I could just keep the temp data, but lets do some more testing first --- lib/git/cmd.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'lib/git') diff --git a/lib/git/cmd.py b/lib/git/cmd.py index 97d6613d..6d712ca9 100644 --- a/lib/git/cmd.py +++ b/lib/git/cmd.py @@ -17,7 +17,7 @@ execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output', 'output_stream' ) extra = {} -# NOTE: Execution through a shell appears to be slightly faster, but in fact +# NOTE: Execution through a shell on windows appears to be slightly faster, but in fact # I consider it a problem whenever complex strings are passed and *interpreted* # by the shell beforehand. This can cause great confusion and reduces compatability # between the OS which is why the shell should not be used ( unless it does not work @@ -401,3 +401,16 @@ class Git(object): cmd.stdout.read(1) # finishing newlines return (hexsha, typename, size, data) + + def clear_cache(self): + """ + Clear all kinds of internal caches to release resources. + + Currently persistent commands will be interrupted. + + Returns + self + """ + self.cat_file_all = None + self.cat_file_header = None + return self -- cgit v1.2.1 From d9671e15703918048982c9ff4e2e0fef21ede320 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 5 Nov 2009 20:31:40 +0100 Subject: fixed test_repo to work on windows cmd: taskkill now pipes stderror to nul as well --- lib/git/cmd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/git') diff --git a/lib/git/cmd.py b/lib/git/cmd.py index 6d712ca9..bccfb611 100644 --- a/lib/git/cmd.py +++ b/lib/git/cmd.py @@ -71,7 +71,7 @@ class Git(object): # for some reason, providing None for stdout/stderr still prints something. This is why # we simply use the shell and redirect to nul. Its slower than CreateProcess, question # is whether we really want to see all these messages. Its annoying no matter what. - subprocess.call(("TASKKILL /F /T /PID %s > nul" % str(self.proc.pid)), shell=True) + subprocess.call(("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(self.proc.pid)), shell=True) # END exception handling def __getattr__(self, attr): -- cgit v1.2.1 From 4e99d9ab2acfaf2ebb4b150736590d6a4e33f449 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 5 Nov 2009 20:36:05 +0100 Subject: Fixed config module which forgot to call the superclass's initializer, finally causing failure in python 2.6 --- lib/git/config.py | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/git') diff --git a/lib/git/config.py b/lib/git/config.py index ccfbae48..bf4c6469 100644 --- a/lib/git/config.py +++ b/lib/git/config.py @@ -116,6 +116,7 @@ class GitConfigParser(cp.RawConfigParser, LockFile): If True, the ConfigParser may only read the data , but not change it. If False, only a single file path or file object may be given. """ + super(GitConfigParser, self).__init__() # initialize base with ordered dictionaries to be sure we write the same # file back self._sections = OrderedDict() -- cgit v1.2.1 From 555b0efc2c19aa8cf7c548b4097bd20a73f572ca Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 5 Nov 2009 21:20:10 +0100 Subject: repo.clone: Added plenty of special handling to allow drive letters to work as expected. Its quite terrible to see a two-line method inflate to 20 as there is no git-daemon on windows, some tests will not work. The error message has been adjusted to be more precise for the poor people trying to run the tests on windows ( including myself ) --- lib/git/repo.py | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'lib/git') diff --git a/lib/git/repo.py b/lib/git/repo.py index 6d388633..d39f11d1 100644 --- a/lib/git/repo.py +++ b/lib/git/repo.py @@ -674,7 +674,38 @@ class Repo(object): Returns ``git.Repo`` (the newly cloned repo) """ - self.git.clone(self.path, path, **kwargs) + # special handling for windows for path at which the clone should be + # created. + # tilde '~' will be expanded to the HOME no matter where the ~ occours. Hence + # we at least give a proper error instead of letting git fail + prev_cwd = None + prev_path = None + if os.name == 'nt': + if '~' in path: + raise OSError("Git cannot handle the ~ character in path %r correctly" % path) + + # on windows, git will think paths like c: are relative and prepend the + # current working dir ( before it fails ). We temporarily adjust the working + # dir to make this actually work + match = re.match("(\w:[/\\\])(.*)", path) + if match: + prev_cwd = os.getcwd() + prev_path = path + drive, rest_of_path = match.groups() + os.chdir(drive) + path = rest_of_path + kwargs['with_keep_cwd'] = True + # END cwd preparation + # END windows handling + + try: + self.git.clone(self.path, path, **kwargs) + finally: + if prev_cwd is not None: + os.chdir(prev_cwd) + path = prev_path + # END reset previous working dir + # END bad windows handling return Repo(path) -- cgit v1.2.1 From e0f2bb42b56f770c50a6ef087e9049fa6ac11fb5 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 5 Nov 2009 23:15:01 +0100 Subject: ARGH: wb and rb is not the same as r and w on windows, hence reading of binary files went crazy as well as binary writing --- lib/git/index.py | 18 ++++++++++++------ lib/git/utils.py | 6 +++--- 2 files changed, 15 insertions(+), 9 deletions(-) (limited to 'lib/git') diff --git a/lib/git/index.py b/lib/git/index.py index 747ccc88..45bf617f 100644 --- a/lib/git/index.py +++ b/lib/git/index.py @@ -36,7 +36,7 @@ class _TemporaryFileSwap(object): def __del__(self): if os.path.isfile(self.tmp_file_path): - if sys.platform == "win32" and os.path.exists(self.file_path): + if os.name == 'nt' and os.path.exists(self.file_path): os.remove(self.file_path) os.rename(self.tmp_file_path, self.file_path) # END temp file exists @@ -243,9 +243,10 @@ class IndexFile(LazyMixin, diff.Diffable): if attr == "entries": # read the current index # try memory map for speed - fp = open(self._file_path, "r") + fp = open(self._file_path, "rb") stream = fp try: + raise Exception() stream = mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ) except Exception: pass @@ -254,7 +255,12 @@ class IndexFile(LazyMixin, diff.Diffable): try: self._read_from_stream(stream) finally: - fp.close() + pass + # make sure we close the stream ( possibly an mmap ) + # and the file + #stream.close() + #if stream is not fp: + # fp.close() # END read from default index on demand else: super(IndexFile, self)._set_cache_(attr) @@ -603,7 +609,7 @@ class IndexFile(LazyMixin, diff.Diffable): """ if not os.path.isabs(path): return path - relative_path = path.replace(self.repo.git.git_dir+"/", "") + relative_path = path.replace(self.repo.git.git_dir+os.sep, "") if relative_path == path: raise ValueError("Absolute path %r is not in git repository at %r" % (path,self.repo.git.git_dir)) return relative_path @@ -839,10 +845,10 @@ class IndexFile(LazyMixin, diff.Diffable): # create message stream tmp_file_path = tempfile.mktemp() - fp = open(tmp_file_path,"w") + fp = open(tmp_file_path,"wb") fp.write(str(message)) fp.close() - fp = open(tmp_file_path,"r") + fp = open(tmp_file_path,"rb") fp.seek(0) try: diff --git a/lib/git/utils.py b/lib/git/utils.py index 4397fbb2..5deed556 100644 --- a/lib/git/utils.py +++ b/lib/git/utils.py @@ -131,7 +131,7 @@ class LockFile(object): lock_file = self._lock_file_path() try: - fp = open(lock_file, "r") + fp = open(lock_file, "rb") pid = int(fp.read()) fp.close() except IOError: @@ -156,7 +156,7 @@ class LockFile(object): if os.path.exists(lock_file): raise IOError("Lock for file %r did already exist, delete %r in case the lock is illegal" % (self._file_path, lock_file)) - fp = open(lock_file, "w") + fp = open(lock_file, "wb") fp.write(str(os.getpid())) fp.close() @@ -212,7 +212,7 @@ class ConcurrentWriteOperation(LockFile): self._obtain_lock_or_raise() dirname, basename = os.path.split(self._file_path) - self._temp_write_fp = open(tempfile.mktemp(basename, '', dirname), "w") + self._temp_write_fp = open(tempfile.mktemp(basename, '', dirname), "wb") return self._temp_write_fp def _is_writing(self): -- cgit v1.2.1 From 4114d19e2c02f3ffca9feb07b40d9475f36604cc Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 6 Nov 2009 11:09:14 +0100 Subject: Fixed commit.count method which now handles the paths case properly. It appears git-rev-list uses empty paths in some way, which is quite hard to specify on a shell, but easy if the process is spawned directly --- lib/git/objects/commit.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'lib/git') diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 80b3ad23..4ec806fb 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -116,7 +116,13 @@ class Commit(base.Object, Iterable, diff.Diffable): Returns int """ - return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).strip().splitlines()) + # yes, it makes a difference whether empty paths are given or not in our case + # as the empty paths version will ignore merge commits for some reason. + if paths: + return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).splitlines()) + else: + return len(self.repo.git.rev_list(self.sha, **kwargs).splitlines()) + @property def name_rev(self): -- cgit v1.2.1