From 6e5aae2fc8c3832bdae1cd5e0a269405fb059231 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 23 Nov 2010 12:35:34 +0100 Subject: Initial interface including some of the implementation of the RefLog. TestCase scetched out for now tests: Added tests to verify that objects don't have a dict. Previously, due to a missing __slots__ member in Serializable, most objects would indeed have a dict, although the opposite was intended --- refs/log.py | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) create mode 100644 refs/log.py (limited to 'refs/log.py') diff --git a/refs/log.py b/refs/log.py new file mode 100644 index 00000000..f67bea4d --- /dev/null +++ b/refs/log.py @@ -0,0 +1,161 @@ +from head import Head +from git.util import join_path +from gitdb.util import ( + join, + file_contents_ro_filepath + ) + +from git.objects.util import ( + Actor, + parse_actor_and_date, + Serializable, + utctz_to_altz, + altz_to_utctz_str, + ) + +import os + + +__all__ = ["RefLog", "RefLogEntry"] + + +class RefLogEntry(tuple): + """Named tuple allowing easy access to the revlog data fields""" + _fmt = "%s %s %s <%s> %i %s\t%s" + __slots__ = tuple() + + def __repr__(self): + """Representation of ourselves in git reflog format""" + act = self.actor + time = self.time + return self._fmt % (self.oldhexsha, self.newhexsha, act.name, act.email, + time[0], altz_to_utctz_str(time[1]), self.message) + + @property + def oldhexsha(self): + """The hexsha to the commit the ref pointed to before the change""" + return self[0] + + @property + def newhexsha(self): + """The hexsha to the commit the ref now points to, after the change""" + return self[1] + + @property + def actor(self): + """Actor instance, providing access""" + return self[2] + + @property + def time(self): + """time as tuple: + + * [0] = int(time) + * [1] = int(timezone_offset) in time.altzone format """ + return self[3] + + @property + def message(self): + """Message describing the operation that acted on the reference""" + return self[4] + + @classmethod + def new(self, oldhexsha, newhexsha, actor, time, tz_offset, message): + """:return: New instance of a RefLogEntry""" + if not isinstance(actor, Actor): + raise ValueError("Need actor instance, got %s" % actor) + # END check types + return RefLogEntry((oldhexsha, newhexsha, actor, (time, tz_offset), message)) + + @classmethod + def from_line(self, line): + """:return: New RefLogEntry instance from the given revlog line. + :param line: line without trailing newline + :raise ValueError: If line could not be parsed""" + raise NotImplementedError("todo") + + +class RefLog(list, Serializable): + """A reflog contains reflog entries, each of which defines a certain state + of the head in question. Custom query methods allow to retrieve log entries + by date or by other criteria. + + Reflog entries are orded, the first added entry is first in the list, the last + entry, i.e. the last change of the head or reference, is last in the list.""" + + __slots__ = tuple() + + #{ Interface + + @classmethod + def from_file(cls, filepath): + """ + :return: a new RefLog instance containing all entries from the reflog + at the given filepath + :param filepath: path to reflog + :raise ValueError: If the file could not be read or was corrupted in some way""" + inst = cls() + fmap = file_contents_ro_filepath(filepath, stream=False, allow_mmap=True) + try: + inst._deserialize(fmap) + finally: + fmap.close() + #END handle closing of handle + return inst + + @classmethod + def reflog_path(cls, ref): + """ + :return: string to absolute path at which the reflog of the given ref + instance would be found. The path is not guaranteed to point to a valid + file though. + :param ref: SymbolicReference instance""" + return join(ref.repo.git_dir, "logs", ref.path) + + @classmethod + def iter_entries(cls, stream): + """ + :return: Iterator yielding RefLogEntry instances, one for each line read + sfrom the given stream. + :param stream: file-like object containing the revlog in its native format + or basestring instance pointing to a file to read""" + new_entry = RefLogEntry.from_line + if isinstance(stream, basestring): + stream = file_contents_ro_filepath(stream) + #END handle stream type + return (new_entry(line.strip()) for line in stream) + + def to_file(self, filepath): + """Write the contents of the reflog instance to a file at the given filepath. + :param filepath: path to file, parent directories are assumed to exist""" + fp = open(filepath, 'wb') + try: + self._serialize(fp) + finally: + fp.close() + #END handle file streams + + #} END interface + + #{ Serializable Interface + def _serialize(self, stream): + lm1 = len(self) - 1 + write = stream.write() + + # write all entries + for i, e in self: + s = repr(e) + if i != lm1: + s += "\n" + #END handle line separator + write(s) + #END for each entry + + def _deserialize(self, stream): + new_entry = RefLogEntry.from_line + append = self.append + # NOTE: should use iter_entries, but this way it will be more direct and faster + for line in stream: + append(new_entry(line.strip())) + #END handle deserializatoin + #} END serializable interface -- cgit v1.2.1 From a93eb7e8484e5bb40f9b8d11ac64a1621cf4c9cd Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 23 Nov 2010 15:49:29 +0100 Subject: Implemented reflog reading and writing --- refs/log.py | 60 +++++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 21 deletions(-) (limited to 'refs/log.py') diff --git a/refs/log.py b/refs/log.py index f67bea4d..e7f18c6a 100644 --- a/refs/log.py +++ b/refs/log.py @@ -7,21 +7,22 @@ from gitdb.util import ( from git.objects.util import ( Actor, - parse_actor_and_date, + parse_date, Serializable, utctz_to_altz, altz_to_utctz_str, ) import os - +import re __all__ = ["RefLog", "RefLogEntry"] class RefLogEntry(tuple): """Named tuple allowing easy access to the revlog data fields""" - _fmt = "%s %s %s <%s> %i %s\t%s" + _fmt = "%s %s %s <%s> %i %s\t%s\n" + _re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') __slots__ = tuple() def __repr__(self): @@ -68,13 +69,34 @@ class RefLogEntry(tuple): return RefLogEntry((oldhexsha, newhexsha, actor, (time, tz_offset), message)) @classmethod - def from_line(self, line): + def from_line(cls, line): """:return: New RefLogEntry instance from the given revlog line. :param line: line without trailing newline :raise ValueError: If line could not be parsed""" - raise NotImplementedError("todo") + try: + info, msg = line.split('\t', 2) + except ValueError: + raise ValueError("line is missing tab separator") + #END handle first plit + oldhexsha = info[:40] + newhexsha = info[41:81] + for hexsha in (oldhexsha, newhexsha): + if not cls._re_hexsha_only.match(hexsha): + raise ValueError("Invalid hexsha: %s" % hexsha) + # END if hexsha re doesn't match + #END for each hexsha + + email_end = info.find('>', 82) + if email_end == -1: + raise ValueError("Missing token: >") + #END handle missing end brace + + actor = Actor._from_string(info[82:email_end+1]) + time, tz_offset = parse_date(info[email_end+2:]) + + return RefLogEntry((oldhexsha, newhexsha, actor, (time, tz_offset), msg)) + - class RefLog(list, Serializable): """A reflog contains reflog entries, each of which defines a certain state of the head in question. Custom query methods allow to retrieve log entries @@ -104,7 +126,7 @@ class RefLog(list, Serializable): return inst @classmethod - def reflog_path(cls, ref): + def path(cls, ref): """ :return: string to absolute path at which the reflog of the given ref instance would be found. The path is not guaranteed to point to a valid @@ -123,7 +145,12 @@ class RefLog(list, Serializable): if isinstance(stream, basestring): stream = file_contents_ro_filepath(stream) #END handle stream type - return (new_entry(line.strip()) for line in stream) + while True: + line = stream.readline() + if not line: + return + yield new_entry(line.strip()) + #END endless loop def to_file(self, filepath): """Write the contents of the reflog instance to a file at the given filepath. @@ -140,22 +167,13 @@ class RefLog(list, Serializable): #{ Serializable Interface def _serialize(self, stream): lm1 = len(self) - 1 - write = stream.write() + write = stream.write # write all entries - for i, e in self: - s = repr(e) - if i != lm1: - s += "\n" - #END handle line separator - write(s) + for i, e in enumerate(self): + write(repr(e)) #END for each entry def _deserialize(self, stream): - new_entry = RefLogEntry.from_line - append = self.append - # NOTE: should use iter_entries, but this way it will be more direct and faster - for line in stream: - append(new_entry(line.strip())) - #END handle deserializatoin + self.extend(self.iter_entries(stream)) #} END serializable interface -- cgit v1.2.1 From 8ad01ee239f9111133e52af29b78daed34c52e49 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 23 Nov 2010 15:58:32 +0100 Subject: SymbolicReference: log method added, including test --- refs/log.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'refs/log.py') diff --git a/refs/log.py b/refs/log.py index e7f18c6a..1d07ef9a 100644 --- a/refs/log.py +++ b/refs/log.py @@ -1,4 +1,3 @@ -from head import Head from git.util import join_path from gitdb.util import ( join, @@ -170,7 +169,7 @@ class RefLog(list, Serializable): write = stream.write # write all entries - for i, e in enumerate(self): + for e in self: write(repr(e)) #END for each entry -- cgit v1.2.1 From a21a9f6f13861ddc65671b278e93cf0984adaa30 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 23 Nov 2010 21:14:59 +0100 Subject: Actor: Moved it from git.objects.util to git.util, adjusted all imports accordingly. Added methods to Actor to retrieve the global committer and author information Reflog: implemented and tested append_entry method --- refs/log.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 11 deletions(-) (limited to 'refs/log.py') diff --git a/refs/log.py b/refs/log.py index 1d07ef9a..8cb0a5ab 100644 --- a/refs/log.py +++ b/refs/log.py @@ -1,17 +1,22 @@ -from git.util import join_path +from git.util import ( + join_path, + Actor, + ) + from gitdb.util import ( + bin_to_hex, join, file_contents_ro_filepath ) from git.objects.util import ( - Actor, parse_date, Serializable, utctz_to_altz, altz_to_utctz_str, ) +import time import os import re @@ -104,7 +109,28 @@ class RefLog(list, Serializable): Reflog entries are orded, the first added entry is first in the list, the last entry, i.e. the last change of the head or reference, is last in the list.""" - __slots__ = tuple() + __slots__ = ('_path', ) + + def __new__(cls, filepath=None): + inst = super(RefLog, cls).__new__(cls) + return inst + + def __init__(self, filepath=None): + """Initialize this instance with an optional filepath, from which we will + initialize our data. The path is also used to write changes back using + the write() method""" + self._path = filepath + if filepath is not None: + self._read_from_file() + # END handle filepath + + def _read_from_file(self): + fmap = file_contents_ro_filepath(self._path, stream=False, allow_mmap=True) + try: + self._deserialize(fmap) + finally: + fmap.close() + #END handle closing of handle #{ Interface @@ -115,14 +141,7 @@ class RefLog(list, Serializable): at the given filepath :param filepath: path to reflog :raise ValueError: If the file could not be read or was corrupted in some way""" - inst = cls() - fmap = file_contents_ro_filepath(filepath, stream=False, allow_mmap=True) - try: - inst._deserialize(fmap) - finally: - fmap.close() - #END handle closing of handle - return inst + return cls(filepath) @classmethod def path(cls, ref): @@ -154,12 +173,35 @@ class RefLog(list, Serializable): def to_file(self, filepath): """Write the contents of the reflog instance to a file at the given filepath. :param filepath: path to file, parent directories are assumed to exist""" + # TODO: Use locked fd fp = open(filepath, 'wb') try: self._serialize(fp) finally: fp.close() #END handle file streams + + def append_entry(self, oldbinsha, newbinsha, message, write=True): + """Append a new log entry to the revlog, changing it in place. + :param oldbinsha: binary sha of the previous commit + :param newbinsha: binary sha of the current commit + :param message: message describing the change to the reference + :param write: If True, the changes will be written right away. Otherwise + the change will not be written + :return: RefLogEntry objects which was appended to the log""" + if len(oldbinsha) != 20 or len(newbinsha) != 20: + raise ValueError("Shas need to be given in binary format") + #END handle sha type + entry = RefLogEntry((bin_to_hex(oldbinsha), bin_to_hex(newbinsha), Actor.committer(), (int(time.time()), time.altzone), message)) + self.append(entry) + if write: + self.write() + #END handle auto-write + return entry + + def write(self): + """Write this instance's data to the file we are originating from""" + return self.to_file(self._path) #} END interface -- cgit v1.2.1 From 61f3db7bd07ac2f3c2ff54615c13bf9219289932 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 23 Nov 2010 22:47:34 +0100 Subject: Removed ORIG_HEAD handling which was downright wrong. ORIG_HEAD gets only set during merge and rebase, and probably everything that changes the ref more drastically. Probably I have to reread that. What needs to be adjusted though is the reflog --- refs/log.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'refs/log.py') diff --git a/refs/log.py b/refs/log.py index 8cb0a5ab..c2799f79 100644 --- a/refs/log.py +++ b/refs/log.py @@ -1,6 +1,7 @@ from git.util import ( join_path, Actor, + LockedFD, ) from gitdb.util import ( @@ -173,13 +174,16 @@ class RefLog(list, Serializable): def to_file(self, filepath): """Write the contents of the reflog instance to a file at the given filepath. :param filepath: path to file, parent directories are assumed to exist""" - # TODO: Use locked fd - fp = open(filepath, 'wb') + lfd = LockedFD(filepath) + fp = lfd.open(write=True, stream=True) try: self._serialize(fp) - finally: - fp.close() - #END handle file streams + lfd.commit() + except: + # on failure it rolls back automatically, but we make it clear + lfd.rollback() + raise + #END handle change def append_entry(self, oldbinsha, newbinsha, message, write=True): """Append a new log entry to the revlog, changing it in place. -- cgit v1.2.1 From 7029773512eee5a0bb765b82cfdd90fd5ab34e15 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 23 Nov 2010 23:20:11 +0100 Subject: Implemented revlog.append_entry as classmethod, to assure we will always actually write_append the new entry, instead of rewriting the whole file. Added file-locking and directory handling, so the implementation should be similar (enough) to the git reference implementation. Next up is to implement a way to update the reflog when changing references, which is going to be a little more complicated --- refs/log.py | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) (limited to 'refs/log.py') diff --git a/refs/log.py b/refs/log.py index c2799f79..9728911a 100644 --- a/refs/log.py +++ b/refs/log.py @@ -2,12 +2,15 @@ from git.util import ( join_path, Actor, LockedFD, + LockFile, + assure_directory_exists, + to_native_path, ) from gitdb.util import ( bin_to_hex, join, - file_contents_ro_filepath + file_contents_ro_filepath, ) from git.objects.util import ( @@ -151,7 +154,7 @@ class RefLog(list, Serializable): instance would be found. The path is not guaranteed to point to a valid file though. :param ref: SymbolicReference instance""" - return join(ref.repo.git_dir, "logs", ref.path) + return join(ref.repo.git_dir, "logs", to_native_path(ref.path)) @classmethod def iter_entries(cls, stream): @@ -175,6 +178,8 @@ class RefLog(list, Serializable): """Write the contents of the reflog instance to a file at the given filepath. :param filepath: path to file, parent directories are assumed to exist""" lfd = LockedFD(filepath) + assure_directory_exists(filepath, is_file=True) + fp = lfd.open(write=True, stream=True) try: self._serialize(fp) @@ -185,22 +190,34 @@ class RefLog(list, Serializable): raise #END handle change - def append_entry(self, oldbinsha, newbinsha, message, write=True): - """Append a new log entry to the revlog, changing it in place. + @classmethod + def append_entry(cls, filepath, oldbinsha, newbinsha, message): + """Append a new log entry to the revlog at filepath. :param oldbinsha: binary sha of the previous commit :param newbinsha: binary sha of the current commit :param message: message describing the change to the reference :param write: If True, the changes will be written right away. Otherwise the change will not be written - :return: RefLogEntry objects which was appended to the log""" + :return: RefLogEntry objects which was appended to the log + :note: As we are append-only, concurrent access is not a problem as we + do not interfere with readers.""" if len(oldbinsha) != 20 or len(newbinsha) != 20: raise ValueError("Shas need to be given in binary format") #END handle sha type - entry = RefLogEntry((bin_to_hex(oldbinsha), bin_to_hex(newbinsha), Actor.committer(), (int(time.time()), time.altzone), message)) - self.append(entry) - if write: - self.write() - #END handle auto-write + assure_directory_exists(filepath, is_file=True) + entry = RefLogEntry((bin_to_hex(oldbinsha), bin_to_hex(newbinsha), Actor.committer(), (int(time.time()), time.altzone), message)) + + lf = LockFile(filepath) + lf._obtain_lock_or_raise() + + fd = open(filepath, 'a') + try: + fd.write(repr(entry)) + finally: + fd.close() + lf._release_lock() + #END handle write operation + return entry def write(self): -- cgit v1.2.1 From ec0657cf5de9aeb5629cc4f4f38b36f48490493e Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 24 Nov 2010 15:56:49 +0100 Subject: Unified object and commit handling which should make the reflog handling much easier. There is some bug in it though, it still needs fixing --- refs/log.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'refs/log.py') diff --git a/refs/log.py b/refs/log.py index 9728911a..129803b4 100644 --- a/refs/log.py +++ b/refs/log.py @@ -191,8 +191,11 @@ class RefLog(list, Serializable): #END handle change @classmethod - def append_entry(cls, filepath, oldbinsha, newbinsha, message): - """Append a new log entry to the revlog at filepath. + def append_entry(cls, config_reader, filepath, oldbinsha, newbinsha, message): + """Append a new log entry to the revlog at filepath. + :param config_reader: configuration reader of the repository - used to obtain + user information. May be None + :param filepath: full path to the log file :param oldbinsha: binary sha of the previous commit :param newbinsha: binary sha of the current commit :param message: message describing the change to the reference @@ -205,7 +208,7 @@ class RefLog(list, Serializable): raise ValueError("Shas need to be given in binary format") #END handle sha type assure_directory_exists(filepath, is_file=True) - entry = RefLogEntry((bin_to_hex(oldbinsha), bin_to_hex(newbinsha), Actor.committer(), (int(time.time()), time.altzone), message)) + entry = RefLogEntry((bin_to_hex(oldbinsha), bin_to_hex(newbinsha), Actor.committer(config_reader), (int(time.time()), time.altzone), message)) lf = LockFile(filepath) lf._obtain_lock_or_raise() -- cgit v1.2.1 From 98a313305f0d554a179b93695d333199feb5266c Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 24 Nov 2010 19:36:34 +0100 Subject: RefLog: added entry_at method, which is a faster way of reading single entries, including test --- refs/log.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'refs/log.py') diff --git a/refs/log.py b/refs/log.py index 129803b4..6c734ad4 100644 --- a/refs/log.py +++ b/refs/log.py @@ -173,6 +173,37 @@ class RefLog(list, Serializable): return yield new_entry(line.strip()) #END endless loop + + @classmethod + def entry_at(cls, filepath, index): + """:return: RefLogEntry at the given index + :param filepath: full path to the index file from which to read the entry + :param index: python list compatible index, i.e. it may be negative to + specifiy an entry counted from the end of the list + + :raise IndexError: If the entry didn't exist + .. note:: This method is faster as it only parses the entry at index, skipping + all other lines. Nonetheless, the whole file has to be read if + the index is negative + """ + fp = open(filepath, 'rb') + if index < 0: + return RefLogEntry.from_line(fp.readlines()[index].strip()) + else: + # read until index is reached + for i in xrange(index+1): + line = fp.readline() + if not line: + break + #END abort on eof + #END handle runup + + if i != index or not line: + raise IndexError + #END handle exception + + return RefLogEntry.from_line(line.strip()) + #END handle index def to_file(self, filepath): """Write the contents of the reflog instance to a file at the given filepath. -- cgit v1.2.1 From 3203cd7629345d32806f470a308975076b2b4686 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 24 Nov 2010 19:48:44 +0100 Subject: Fixed doc strings, improved error checking on RefLog.write method --- refs/log.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'refs/log.py') diff --git a/refs/log.py b/refs/log.py index 6c734ad4..f49c07fd 100644 --- a/refs/log.py +++ b/refs/log.py @@ -182,6 +182,7 @@ class RefLog(list, Serializable): specifiy an entry counted from the end of the list :raise IndexError: If the entry didn't exist + .. note:: This method is faster as it only parses the entry at index, skipping all other lines. Nonetheless, the whole file has to be read if the index is negative @@ -224,6 +225,7 @@ class RefLog(list, Serializable): @classmethod def append_entry(cls, config_reader, filepath, oldbinsha, newbinsha, message): """Append a new log entry to the revlog at filepath. + :param config_reader: configuration reader of the repository - used to obtain user information. May be None :param filepath: full path to the log file @@ -255,8 +257,13 @@ class RefLog(list, Serializable): return entry def write(self): - """Write this instance's data to the file we are originating from""" - return self.to_file(self._path) + """Write this instance's data to the file we are originating from + :return: self""" + if self._path is None: + raise ValueError("Instance was not initialized with a path, use to_file(...) instead") + #END assert path + self.to_file(self._path) + return self #} END interface -- cgit v1.2.1