summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorVicent Martí <vicent@github.com>2013-09-17 09:57:55 -0700
committerVicent Martí <vicent@github.com>2013-09-17 09:57:55 -0700
commitbb371b62e950e3307d3acf2f772495a60565d266 (patch)
treeb4479ec3ad261bcac13493ee3f5ad45d15dfdda2 /include
parent4581f9d8ab72e9b97817e1eaa7154bcec1c7f0b1 (diff)
parentf60ed4e6495b8bf68d0604335672e6f300330b3b (diff)
downloadlibgit2-bb371b62e950e3307d3acf2f772495a60565d266.tar.gz
Merge pull request #1847 from libgit2/filters-alternative
Alternative proposal for filter API
Diffstat (limited to 'include')
-rw-r--r--include/git2.h3
-rw-r--r--include/git2/blob.h32
-rw-r--r--include/git2/buffer.h112
-rw-r--r--include/git2/errors.h1
-rw-r--r--include/git2/filter.h142
-rw-r--r--include/git2/sys/filter.h292
6 files changed, 582 insertions, 0 deletions
diff --git a/include/git2.h b/include/git2.h
index e8638a830..73c11ad83 100644
--- a/include/git2.h
+++ b/include/git2.h
@@ -58,4 +58,7 @@
#include "git2/stash.h"
#include "git2/pathspec.h"
+#include "git2/buffer.h"
+#include "git2/filter.h"
+
#endif
diff --git a/include/git2/blob.h b/include/git2/blob.h
index 8fca48966..dcab4fbe0 100644
--- a/include/git2/blob.h
+++ b/include/git2/blob.h
@@ -11,6 +11,7 @@
#include "types.h"
#include "oid.h"
#include "object.h"
+#include "buffer.h"
/**
* @file git2/blob.h
@@ -96,6 +97,37 @@ GIT_EXTERN(const void *) git_blob_rawcontent(const git_blob *blob);
GIT_EXTERN(git_off_t) git_blob_rawsize(const git_blob *blob);
/**
+ * Get a buffer with the filtered content of a blob.
+ *
+ * This applies filters as if the blob was being checked out to the
+ * working directory under the specified filename. This may apply
+ * CRLF filtering or other types of changes depending on the file
+ * attributes set for the blob and the content detected in it.
+ *
+ * The output is written into a `git_buf` which the caller must free
+ * when done (via `git_buf_free`).
+ *
+ * If no filters need to be applied, then the `out` buffer will just be
+ * populated with a pointer to the raw content of the blob. In that case,
+ * be careful to *not* free the blob until done with the buffer. To keep
+ * the data detached from the blob, call `git_buf_grow` on the buffer
+ * with a `want_size` of 0 and the buffer will be reallocated to be
+ * detached from the blob.
+ *
+ * @param out The git_buf to be filled in
+ * @param blob Pointer to the blob
+ * @param as_path Path used for file attribute lookups, etc.
+ * @param check_for_binary_data Should this test if blob content contains
+ * NUL bytes / looks like binary data before applying filters?
+ * @return 0 on success or an error code
+ */
+GIT_EXTERN(int) git_blob_filtered_content(
+ git_buf *out,
+ git_blob *blob,
+ const char *as_path,
+ int check_for_binary_data);
+
+/**
* Read a file from the working folder of a repository
* and write it to the Object Database as a loose blob
*
diff --git a/include/git2/buffer.h b/include/git2/buffer.h
new file mode 100644
index 000000000..36a61e6c9
--- /dev/null
+++ b/include/git2/buffer.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+#ifndef INCLUDE_git_buf_h__
+#define INCLUDE_git_buf_h__
+
+#include "common.h"
+
+/**
+ * @file git2/buffer.h
+ * @brief Buffer export structure
+ *
+ * @ingroup Git
+ * @{
+ */
+GIT_BEGIN_DECL
+
+/**
+ * A data buffer for exporting data from libgit2
+ *
+ * Sometimes libgit2 wants to return an allocated data buffer to the
+ * caller and have the caller take responsibility for freeing that memory.
+ * This can be awkward if the caller does not have easy access to the same
+ * allocation functions that libgit2 is using. In those cases, libgit2
+ * will fill in a `git_buf` and the caller can use `git_buf_free()` to
+ * release it when they are done.
+ *
+ * A `git_buf` may also be used for the caller to pass in a reference to
+ * a block of memory they hold. In this case, libgit2 will not resize or
+ * free the memory, but will read from it as needed.
+ *
+ * A `git_buf` is a public structure with three fields:
+ *
+ * - `ptr` points to the start of the allocated memory. If it is NULL,
+ * then the `git_buf` is considered empty and libgit2 will feel free
+ * to overwrite it with new data.
+ *
+ * - `size` holds the size (in bytes) of the data that is actually used.
+ *
+ * - `asize` holds the known total amount of allocated memory if the `ptr`
+ * was allocated by libgit2. It may be larger than `size`. If `ptr`
+ * was not allocated by libgit2 and should not be resized and/or freed,
+ * then `asize` will be set to zero.
+ *
+ * Some APIs may occasionally do something slightly unusual with a buffer,
+ * such as setting `ptr` to a value that was passed in by the user. In
+ * those cases, the behavior will be clearly documented by the API.
+ */
+typedef struct {
+ char *ptr;
+ size_t asize, size;
+} git_buf;
+
+/**
+ * Static initializer for git_buf from static buffer
+ */
+#define GIT_BUF_INIT_CONST(STR,LEN) { (char *)(STR), 0, (size_t)(LEN) }
+
+/**
+ * Free the memory referred to by the git_buf.
+ *
+ * Note that this does not free the `git_buf` itself, just the memory
+ * pointed to by `buffer->ptr`. This will not free the memory if it looks
+ * like it was not allocated internally, but it will clear the buffer back
+ * to the empty state.
+ *
+ * @param buffer The buffer to deallocate
+ */
+GIT_EXTERN(void) git_buf_free(git_buf *buffer);
+
+/**
+ * Resize the buffer allocation to make more space.
+ *
+ * This will attempt to grow the buffer to accomodate the target size.
+ *
+ * If the buffer refers to memory that was not allocated by libgit2 (i.e.
+ * the `asize` field is zero), then `ptr` will be replaced with a newly
+ * allocated block of data. Be careful so that memory allocated by the
+ * caller is not lost. As a special variant, if you pass `target_size` as
+ * 0 and the memory is not allocated by libgit2, this will allocate a new
+ * buffer of size `size` and copy the external data into it.
+ *
+ * Currently, this will never shrink a buffer, only expand it.
+ *
+ * If the allocation fails, this will return an error and the buffer will be
+ * marked as invalid for future operations, invaliding the contents.
+ *
+ * @param buffer The buffer to be resized; may or may not be allocated yet
+ * @param target_size The desired available size
+ * @return 0 on success, -1 on allocation failure
+ */
+GIT_EXTERN(int) git_buf_grow(git_buf *buffer, size_t target_size);
+
+/**
+ * Set buffer to a copy of some raw data.
+ *
+ * @param buffer The buffer to set
+ * @param data The data to copy into the buffer
+ * @param datalen The length of the data to copy into the buffer
+ * @return 0 on success, -1 on allocation failure
+ */
+GIT_EXTERN(int) git_buf_set(
+ git_buf *buffer, const void *data, size_t datalen);
+
+GIT_END_DECL
+
+/** @} */
+
+#endif
diff --git a/include/git2/errors.h b/include/git2/errors.h
index dc4486ade..a454ac956 100644
--- a/include/git2/errors.h
+++ b/include/git2/errors.h
@@ -68,6 +68,7 @@ typedef enum {
GITERR_FETCHHEAD,
GITERR_MERGE,
GITERR_SSH,
+ GITERR_FILTER,
} git_error_t;
/**
diff --git a/include/git2/filter.h b/include/git2/filter.h
new file mode 100644
index 000000000..f96b6766b
--- /dev/null
+++ b/include/git2/filter.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+#ifndef INCLUDE_git_filter_h__
+#define INCLUDE_git_filter_h__
+
+#include "common.h"
+#include "types.h"
+#include "oid.h"
+#include "buffer.h"
+
+/**
+ * @file git2/filter.h
+ * @brief Git filter APIs
+ *
+ * @ingroup Git
+ * @{
+ */
+GIT_BEGIN_DECL
+
+/**
+ * Filters are applied in one of two directions: smudging - which is
+ * exporting a file from the Git object database to the working directory,
+ * and cleaning - which is importing a file from the working directory to
+ * the Git object database. These values control which direction of
+ * change is being applied.
+ */
+typedef enum {
+ GIT_FILTER_TO_WORKTREE = 0,
+ GIT_FILTER_SMUDGE = GIT_FILTER_TO_WORKTREE,
+ GIT_FILTER_TO_ODB = 1,
+ GIT_FILTER_CLEAN = GIT_FILTER_TO_ODB,
+} git_filter_mode_t;
+
+/**
+ * A filter that can transform file data
+ *
+ * This represents a filter that can be used to transform or even replace
+ * file data. Libgit2 includes one built in filter and it is possible to
+ * write your own (see git2/sys/filter.h for information on that).
+ *
+ * The two builtin filters are:
+ *
+ * * "crlf" which uses the complex rules with the "text", "eol", and
+ * "crlf" file attributes to decide how to convert between LF and CRLF
+ * line endings
+ * * "ident" which replaces "$Id$" in a blob with "$Id: <blob OID>$" upon
+ * checkout and replaced "$Id: <anything>$" with "$Id$" on checkin.
+ */
+typedef struct git_filter git_filter;
+
+/**
+ * List of filters to be applied
+ *
+ * This represents a list of filters to be applied to a file / blob. You
+ * can build the list with one call, apply it with another, and dispose it
+ * with a third. In typical usage, there are not many occasions where a
+ * git_filter_list is needed directly since the library will generally
+ * handle conversions for you, but it can be convenient to be able to
+ * build and apply the list sometimes.
+ */
+typedef struct git_filter_list git_filter_list;
+
+/**
+ * Load the filter list for a given path.
+ *
+ * This will return 0 (success) but set the output git_filter_list to NULL
+ * if no filters are requested for the given file.
+ *
+ * @param filters Output newly created git_filter_list (or NULL)
+ * @param repo Repository object that contains `path`
+ * @param blob The blob to which the filter will be applied (if known)
+ * @param path Relative path of the file to be filtered
+ * @param mode Filtering direction (WT->ODB or ODB->WT)
+ * @return 0 on success (which could still return NULL if no filters are
+ * needed for the requested file), <0 on error
+ */
+GIT_EXTERN(int) git_filter_list_load(
+ git_filter_list **filters,
+ git_repository *repo,
+ git_blob *blob, /* can be NULL */
+ const char *path,
+ git_filter_mode_t mode);
+
+/**
+ * Apply filter list to a data buffer.
+ *
+ * See `git2/buffer.h` for background on `git_buf` objects.
+ *
+ * If the `in` buffer holds data allocated by libgit2 (i.e. `in->asize` is
+ * not zero), then it will be overwritten when applying the filters. If
+ * not, then it will be left untouched.
+ *
+ * If there are no filters to apply (or `filters` is NULL), then the `out`
+ * buffer will reference the `in` buffer data (with `asize` set to zero)
+ * instead of allocating data. This keeps allocations to a minimum, but
+ * it means you have to be careful about freeing the `in` data since `out`
+ * may be pointing to it!
+ *
+ * @param out Buffer to store the result of the filtering
+ * @param filters A loaded git_filter_list (or NULL)
+ * @param in Buffer containing the data to filter
+ * @return 0 on success, an error code otherwise
+ */
+GIT_EXTERN(int) git_filter_list_apply_to_data(
+ git_buf *out,
+ git_filter_list *filters,
+ git_buf *in);
+
+/**
+ * Apply filter list to the contents of a file on disk
+ */
+GIT_EXTERN(int) git_filter_list_apply_to_file(
+ git_buf *out,
+ git_filter_list *filters,
+ git_repository *repo,
+ const char *path);
+
+/**
+ * Apply filter list to the contents of a blob
+ */
+GIT_EXTERN(int) git_filter_list_apply_to_blob(
+ git_buf *out,
+ git_filter_list *filters,
+ git_blob *blob);
+
+/**
+ * Free a git_filter_list
+ *
+ * @param filters A git_filter_list created by `git_filter_list_load`
+ */
+GIT_EXTERN(void) git_filter_list_free(git_filter_list *filters);
+
+
+GIT_END_DECL
+
+/** @} */
+
+#endif
diff --git a/include/git2/sys/filter.h b/include/git2/sys/filter.h
new file mode 100644
index 000000000..94ad3aed4
--- /dev/null
+++ b/include/git2/sys/filter.h
@@ -0,0 +1,292 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+#ifndef INCLUDE_sys_git_filter_h__
+#define INCLUDE_sys_git_filter_h__
+
+#include "git2/filter.h"
+
+/**
+ * @file git2/sys/filter.h
+ * @brief Git filter backend and plugin routines
+ * @defgroup git_backend Git custom backend APIs
+ * @ingroup Git
+ * @{
+ */
+GIT_BEGIN_DECL
+
+/**
+ * Look up a filter by name
+ *
+ * @param name The name of the filter
+ * @return Pointer to the filter object or NULL if not found
+ */
+GIT_EXTERN(git_filter *) git_filter_lookup(const char *name);
+
+#define GIT_FILTER_CRLF "crlf"
+#define GIT_FILTER_IDENT "ident"
+
+/**
+ * This is priority that the internal CRLF filter will be registered with
+ */
+#define GIT_FILTER_CRLF_PRIORITY 0
+
+/**
+ * This is priority that the internal ident filter will be registered with
+ */
+#define GIT_FILTER_IDENT_PRIORITY 100
+
+/**
+ * This is priority to use with a custom filter to imitate a core Git
+ * filter driver, so that it will be run last on checkout and first on
+ * checkin. You do not have to use this, but it helps compatibility.
+ */
+#define GIT_FILTER_DRIVER_PRIORITY 200
+
+/**
+ * Create a new empty filter list
+ *
+ * Normally you won't use this because `git_filter_list_load` will create
+ * the filter list for you, but you can use this in combination with the
+ * `git_filter_lookup` and `git_filter_list_push` functions to assemble
+ * your own chains of filters.
+ */
+GIT_EXTERN(int) git_filter_list_new(
+ git_filter_list **out, git_repository *repo, git_filter_mode_t mode);
+
+/**
+ * Add a filter to a filter list with the given payload.
+ *
+ * Normally you won't have to do this because the filter list is created
+ * by calling the "check" function on registered filters when the filter
+ * attributes are set, but this does allow more direct manipulation of
+ * filter lists when desired.
+ *
+ * Note that normally the "check" function can set up a payload for the
+ * filter. Using this function, you can either pass in a payload if you
+ * know the expected payload format, or you can pass NULL. Some filters
+ * may fail with a NULL payload. Good luck!
+ */
+GIT_EXTERN(int) git_filter_list_push(
+ git_filter_list *fl, git_filter *filter, void *payload);
+
+/**
+ * Look up how many filters are in the list
+ *
+ * We will attempt to apply all of these filters to any data passed in,
+ * but note that the filter apply action still has the option of skipping
+ * data that is passed in (for example, the CRLF filter will skip data
+ * that appears to be binary).
+ *
+ * @param fl A filter list
+ * @return The number of filters in the list
+ */
+GIT_EXTERN(size_t) git_filter_list_length(const git_filter_list *fl);
+
+/**
+ * A filter source represents a file/blob to be processed
+ */
+typedef struct git_filter_source git_filter_source;
+
+/**
+ * Get the repository that the source data is coming from.
+ */
+GIT_EXTERN(git_repository *) git_filter_source_repo(const git_filter_source *src);
+
+/**
+ * Get the path that the source data is coming from.
+ */
+GIT_EXTERN(const char *) git_filter_source_path(const git_filter_source *src);
+
+/**
+ * Get the file mode of the source file
+ * If the mode is unknown, this will return 0
+ */
+GIT_EXTERN(uint16_t) git_filter_source_filemode(const git_filter_source *src);
+
+/**
+ * Get the OID of the source
+ * If the OID is unknown (often the case with GIT_FILTER_CLEAN) then
+ * this will return NULL.
+ */
+GIT_EXTERN(const git_oid *) git_filter_source_id(const git_filter_source *src);
+
+/**
+ * Get the git_filter_mode_t to be applied
+ */
+GIT_EXTERN(git_filter_mode_t) git_filter_source_mode(const git_filter_source *src);
+
+/*
+ * struct git_filter
+ *
+ * The filter lifecycle:
+ * - initialize - first use of filter
+ * - shutdown - filter removed/unregistered from system
+ * - check - considering filter for file
+ * - apply - apply filter to file contents
+ * - cleanup - done with file
+ */
+
+/**
+ * Initialize callback on filter
+ *
+ * Specified as `filter.initialize`, this is an optional callback invoked
+ * before a filter is first used. It will be called once at most.
+ *
+ * If non-NULL, the filter's `initialize` callback will be invoked right
+ * before the first use of the filter, so you can defer expensive
+ * initialization operations (in case libgit2 is being used in a way that
+ * doesn't need the filter).
+ */
+typedef int (*git_filter_init_fn)(git_filter *self);
+
+/**
+ * Shutdown callback on filter
+ *
+ * Specified as `filter.shutdown`, this is an optional callback invoked
+ * when the filter is unregistered or when libgit2 is shutting down. It
+ * will be called once at most and should release resources as needed.
+ *
+ * Typically this function will free the `git_filter` object itself.
+ */
+typedef void (*git_filter_shutdown_fn)(git_filter *self);
+
+/**
+ * Callback to decide if a given source needs this filter
+ *
+ * Specified as `filter.check`, this is an optional callback that checks
+ * if filtering is needed for a given source.
+ *
+ * It should return 0 if the filter should be applied (i.e. success),
+ * GIT_PASSTHROUGH if the filter should not be applied, or an error code
+ * to fail out of the filter processing pipeline and return to the caller.
+ *
+ * The `attr_values` will be set to the values of any attributes given in
+ * the filter definition. See `git_filter` below for more detail.
+ *
+ * The `payload` will be a pointer to a reference payload for the filter.
+ * This will start as NULL, but `check` can assign to this pointer for
+ * later use by the `apply` callback. Note that the value should be heap
+ * allocated (not stack), so that it doesn't go away before the `apply`
+ * callback can use it. If a filter allocates and assigns a value to the
+ * `payload`, it will need a `cleanup` callback to free the payload.
+ */
+typedef int (*git_filter_check_fn)(
+ git_filter *self,
+ void **payload, /* points to NULL ptr on entry, may be set */
+ const git_filter_source *src,
+ const char **attr_values);
+
+/**
+ * Callback to actually perform the data filtering
+ *
+ * Specified as `filter.apply`, this is the callback that actually filters
+ * data. If it successfully writes the output, it should return 0. Like
+ * `check`, it can return GIT_PASSTHROUGH to indicate that the filter
+ * doesn't want to run. Other error codes will stop filter processing and
+ * return to the caller.
+ *
+ * The `payload` value will refer to any payload that was set by the
+ * `check` callback. It may be read from or written to as needed.
+ */
+typedef int (*git_filter_apply_fn)(
+ git_filter *self,
+ void **payload, /* may be read and/or set */
+ git_buf *to,
+ const git_buf *from,
+ const git_filter_source *src);
+
+/**
+ * Callback to clean up after filtering has been applied
+ *
+ * Specified as `filter.cleanup`, this is an optional callback invoked
+ * after the filter has been applied. If the `check` or `apply` callbacks
+ * allocated a `payload` to keep per-source filter state, use this
+ * callback to free that payload and release resources as required.
+ */
+typedef void (*git_filter_cleanup_fn)(
+ git_filter *self,
+ void *payload);
+
+/**
+ * Filter structure used to register custom filters.
+ *
+ * To associate extra data with a filter, allocate extra data and put the
+ * `git_filter` struct at the start of your data buffer, then cast the
+ * `self` pointer to your larger structure when your callback is invoked.
+ *
+ * `version` should be set to GIT_FILTER_VERSION
+ *
+ * `attributes` is a whitespace-separated list of attribute names to check
+ * for this filter (e.g. "eol crlf text"). If the attribute name is bare,
+ * it will be simply loaded and passed to the `check` callback. If it has
+ * a value (i.e. "name=value"), the attribute must match that value for
+ * the filter to be applied.
+ *
+ * The `initialize`, `shutdown`, `check`, `apply`, and `cleanup` callbacks
+ * are all documented above with the respective function pointer typedefs.
+ */
+struct git_filter {
+ unsigned int version;
+
+ const char *attributes;
+
+ git_filter_init_fn initialize;
+ git_filter_shutdown_fn shutdown;
+ git_filter_check_fn check;
+ git_filter_apply_fn apply;
+ git_filter_cleanup_fn cleanup;
+};
+
+#define GIT_FILTER_VERSION 1
+
+/**
+ * Register a filter under a given name with a given priority.
+ *
+ * As mentioned elsewhere, the initialize callback will not be invoked
+ * immediately. It is deferred until the filter is used in some way.
+ *
+ * A filter's attribute checks and `check` and `apply` callbacks will be
+ * issued in order of `priority` on smudge (to workdir), and in reverse
+ * order of `priority` on clean (to odb).
+ *
+ * Two filters are preregistered with libgit2:
+ * - GIT_FILTER_CRLF with priority 0
+ * - GIT_FILTER_IDENT with priority 100
+ *
+ * Currently the filter registry is not thread safe, so any registering or
+ * deregistering of filters must be done outside of any possible usage of
+ * the filters (i.e. during application setup or shutdown).
+ *
+ * @param name A name by which the filter can be referenced. Attempting
+ * to register with an in-use name will return GIT_EEXISTS.
+ * @param filter The filter definition. This pointer will be stored as is
+ * by libgit2 so it must be a durable allocation (either static
+ * or on the heap).
+ * @param priority The priority for filter application
+ * @return 0 on successful registry, error code <0 on failure
+ */
+GIT_EXTERN(int) git_filter_register(
+ const char *name, git_filter *filter, int priority);
+
+/**
+ * Remove the filter with the given name
+ *
+ * Attempting to remove the builtin libgit2 filters is not permitted and
+ * will return an error.
+ *
+ * Currently the filter registry is not thread safe, so any registering or
+ * deregistering of filters must be done outside of any possible usage of
+ * the filters (i.e. during application setup or shutdown).
+ *
+ * @param name The name under which the filter was registered
+ * @return 0 on success, error code <0 on failure
+ */
+GIT_EXTERN(int) git_filter_unregister(const char *name);
+
+/** @} */
+GIT_END_DECL
+#endif