diff options
author | Gauvain Pocentek <gauvain@pocentek.net> | 2016-07-17 14:09:39 +0200 |
---|---|---|
committer | Gauvain Pocentek <gauvain@pocentek.net> | 2016-07-17 14:09:39 +0200 |
commit | 94aea524a23ac428259bae327a1fccdd2f5b841d (patch) | |
tree | f8c3c8a90f4baf8ef271f6df3572de7991bbe22c | |
parent | 8e6a9442324926ed1dec0a8bfaf77792e4bdb10f (diff) | |
download | gitlab-94aea524a23ac428259bae327a1fccdd2f5b841d.tar.gz |
Allow to stream the downloads when appropriate
Some API calls will download possibly large data, resulting in a high
memory usage and out-of-memory errors. For these API calls use the
requests streaming capabilities and download chunked data. The caller is
responsible of providing a callable to actually store the data.
The default callable just prints the data on stdout.
-rw-r--r-- | docs/gl_objects/builds.py | 13 | ||||
-rw-r--r-- | docs/gl_objects/builds.rst | 14 | ||||
-rw-r--r-- | gitlab/__init__.py | 3 | ||||
-rw-r--r-- | gitlab/objects.py | 53 | ||||
-rw-r--r-- | gitlab/utils.py | 15 |
5 files changed, 75 insertions, 23 deletions
diff --git a/docs/gl_objects/builds.py b/docs/gl_objects/builds.py index 4b663c6..c535cb2 100644 --- a/docs/gl_objects/builds.py +++ b/docs/gl_objects/builds.py @@ -77,6 +77,19 @@ project.builds.get(build_id) build.artifacts() # end artifacts +# stream artifacts +class Foo(object): + def __init__(self): + self._fd = open('artifacts.zip', 'w') + + def __call__(self, chunk): + self._fd.write(chunk) + +target = Foo() +build.artifacts(streamed=True, streamed=True, action=target) +del(target) # flushes data on disk +# end stream artifacts + # keep artifacts build.keep_artifacts() # end keep artifacts diff --git a/docs/gl_objects/builds.rst b/docs/gl_objects/builds.rst index 23f47f0..ce4cc0e 100644 --- a/docs/gl_objects/builds.rst +++ b/docs/gl_objects/builds.rst @@ -116,7 +116,16 @@ Get a build artifacts: .. warning:: - Artifacts are entirely stored in memory. + Artifacts are entirely stored in memory in this example. + +.. _streaming_example: + +You can download artifacts as a stream. Provide a callable to handle the +stream: + +.. literalinclude:: builds.py + :start-after: # stream artifacts + :end-before: # end stream artifacts Mark a build artifact as kept when expiration is set: @@ -132,7 +141,8 @@ Get a build trace: .. warning:: - Traces are entirely stored in memory. + Traces are entirely stored in memory unless you use the streaming feature. + See :ref:`the artifacts example <streaming_example>`. Cancel/retry a build: diff --git a/gitlab/__init__.py b/gitlab/__init__.py index 6aa9c18..d702f31 100644 --- a/gitlab/__init__.py +++ b/gitlab/__init__.py @@ -286,7 +286,7 @@ class Gitlab(object): self.email = email self.password = password - def _raw_get(self, path, content_type=None, **kwargs): + def _raw_get(self, path, content_type=None, streamed=False, **kwargs): url = '%s%s' % (self._url, path) headers = self._create_headers(content_type) try: @@ -295,6 +295,7 @@ class Gitlab(object): headers=headers, verify=self.ssl_verify, timeout=self.timeout, + stream=streamed, auth=requests.auth.HTTPBasicAuth( self.http_username, self.http_password)) diff --git a/gitlab/objects.py b/gitlab/objects.py index 7d2b879..a8b92de 100644 --- a/gitlab/objects.py +++ b/gitlab/objects.py @@ -29,6 +29,7 @@ import six import gitlab from gitlab.exceptions import * # noqa +from gitlab import utils class jsonEncoder(json.JSONEncoder): @@ -889,22 +890,31 @@ class ProjectBuild(GitlabObject): r = self.gitlab._raw_post(url) raise_error_from_response(r, GitlabGetError, 200) - def artifacts(self, **kwargs): + def artifacts(self, streamed=False, action=None, chunk_size=1024, + **kwargs): """Get the build artifacts. + Args: + streamed (bool): If True the data will be processed by chunks of + `chunk_size` and each chunk is passed to `action` for + treatment. + action (callable): Callable responsible of dealing with chunk of + data. + chunk_size (int): Size of each chunk. + Returns: - str: The artifacts. + str: The artifacts if `streamed` is False, None otherwise. Raises: GitlabConnectionError: If the server cannot be reached. GitlabGetError: If the artifacts are not available. """ url = '/projects/%s/builds/%s/artifacts' % (self.project_id, self.id) - r = self.gitlab._raw_get(url) + r = self.gitlab._raw_get(url, streamed=streamed, **kwargs) raise_error_from_response(r, GitlabGetError, 200) - return r.content + return utils.response_content(r, streamed, action, chunk_size) - def trace(self, **kwargs): + def trace(self, streamed=False, action=None, chunk_size=1024, **kwargs): """Get the build trace. Returns: @@ -915,9 +925,9 @@ class ProjectBuild(GitlabObject): GitlabGetError: If the trace is not available. """ url = '/projects/%s/builds/%s/trace' % (self.project_id, self.id) - r = self.gitlab._raw_get(url) + r = self.gitlab._raw_get(url, streamed=streamed, **kwargs) raise_error_from_response(r, GitlabGetError, 200) - return r.content + return utils.response_content(r, streamed, action, chunk_size) class ProjectBuildManager(BaseManager): @@ -972,7 +982,8 @@ class ProjectCommit(GitlabObject): return r.json() - def blob(self, filepath, **kwargs): + def blob(self, filepath, streamed=False, action=None, chunk_size=1024, + **kwargs): """Generate the content of a file for this commit. Args: @@ -988,10 +999,9 @@ class ProjectCommit(GitlabObject): url = ('/projects/%(project_id)s/repository/blobs/%(commit_id)s' % {'project_id': self.project_id, 'commit_id': self.id}) url += '?filepath=%s' % filepath - r = self.gitlab._raw_get(url, **kwargs) + r = self.gitlab._raw_get(url, streamed=streamed, **kwargs) raise_error_from_response(r, GitlabGetError) - - return r.content + return utils.response_content(r, streamed, action, chunk_size) def builds(self, **kwargs): """List the build for this commit. @@ -1734,7 +1744,8 @@ class Project(GitlabObject): DeprecationWarning) return self.repository_blob(sha, filepath, **kwargs) - def repository_blob(self, sha, filepath, **kwargs): + def repository_blob(self, sha, filepath, streamed=False, action=None, + chunk_size=1024, **kwargs): """Return the content of a file for a commit. Args: @@ -1750,11 +1761,12 @@ class Project(GitlabObject): """ url = "/projects/%s/repository/blobs/%s" % (self.id, sha) url += '?filepath=%s' % (filepath) - r = self.gitlab._raw_get(url, **kwargs) + r = self.gitlab._raw_get(url, streamed=streamed, **kwargs) raise_error_from_response(r, GitlabGetError) - return r.content + return utils.response_content(r, streamed, action, chunk_size) - def repository_raw_blob(self, sha, **kwargs): + def repository_raw_blob(self, sha, streamed=False, action=None, + chunk_size=1024, **kwargs): """Returns the raw file contents for a blob by blob SHA. Args: @@ -1768,9 +1780,9 @@ class Project(GitlabObject): GitlabGetError: If the server fails to perform the request. """ url = "/projects/%s/repository/raw_blobs/%s" % (self.id, sha) - r = self.gitlab._raw_get(url, **kwargs) + r = self.gitlab._raw_get(url, streamed=streamed, **kwargs) raise_error_from_response(r, GitlabGetError) - return r.content + return utils.response_content(r, streamed, action, chunk_size) def repository_compare(self, from_, to, **kwargs): """Returns a diff between two branches/commits. @@ -1813,7 +1825,8 @@ class Project(GitlabObject): DeprecationWarning) return self.repository_archive(sha, **kwargs) - def repository_archive(self, sha=None, **kwargs): + def repository_archive(self, sha=None, streamed=False, action=None, + chunk_size=1024, **kwargs): """Return a tarball of the repository. Args: @@ -1829,9 +1842,9 @@ class Project(GitlabObject): url = '/projects/%s/repository/archive' % self.id if sha: url += '?sha=%s' % sha - r = self.gitlab._raw_get(url, **kwargs) + r = self.gitlab._raw_get(url, streamed=streamed, **kwargs) raise_error_from_response(r, GitlabGetError) - return r.content + return utils.response_content(r, streamed, action, chunk_size) def create_file(self, path, branch, content, message, **kwargs): """Creates file in project repository diff --git a/gitlab/utils.py b/gitlab/utils.py new file mode 100644 index 0000000..181ca20 --- /dev/null +++ b/gitlab/utils.py @@ -0,0 +1,15 @@ +class _StdoutStream(object): + def __call__(self, chunk): + print(chunk) + + +def response_content(response, streamed, action, chunk_size): + if streamed is False: + return response.content + + if action is None: + action = _StdoutStream() + + for chunk in response.iter_content(chunk_size=chunk_size): + if chunk: + action(chunk) |