diff options
| author | Vicent Martà <vicent@github.com> | 2013-09-17 09:57:55 -0700 |
|---|---|---|
| committer | Vicent Martà <vicent@github.com> | 2013-09-17 09:57:55 -0700 |
| commit | bb371b62e950e3307d3acf2f772495a60565d266 (patch) | |
| tree | b4479ec3ad261bcac13493ee3f5ad45d15dfdda2 /include | |
| parent | 4581f9d8ab72e9b97817e1eaa7154bcec1c7f0b1 (diff) | |
| parent | f60ed4e6495b8bf68d0604335672e6f300330b3b (diff) | |
| download | libgit2-bb371b62e950e3307d3acf2f772495a60565d266.tar.gz | |
Merge pull request #1847 from libgit2/filters-alternative
Alternative proposal for filter API
Diffstat (limited to 'include')
| -rw-r--r-- | include/git2.h | 3 | ||||
| -rw-r--r-- | include/git2/blob.h | 32 | ||||
| -rw-r--r-- | include/git2/buffer.h | 112 | ||||
| -rw-r--r-- | include/git2/errors.h | 1 | ||||
| -rw-r--r-- | include/git2/filter.h | 142 | ||||
| -rw-r--r-- | include/git2/sys/filter.h | 292 |
6 files changed, 582 insertions, 0 deletions
diff --git a/include/git2.h b/include/git2.h index e8638a830..73c11ad83 100644 --- a/include/git2.h +++ b/include/git2.h @@ -58,4 +58,7 @@ #include "git2/stash.h" #include "git2/pathspec.h" +#include "git2/buffer.h" +#include "git2/filter.h" + #endif diff --git a/include/git2/blob.h b/include/git2/blob.h index 8fca48966..dcab4fbe0 100644 --- a/include/git2/blob.h +++ b/include/git2/blob.h @@ -11,6 +11,7 @@ #include "types.h" #include "oid.h" #include "object.h" +#include "buffer.h" /** * @file git2/blob.h @@ -96,6 +97,37 @@ GIT_EXTERN(const void *) git_blob_rawcontent(const git_blob *blob); GIT_EXTERN(git_off_t) git_blob_rawsize(const git_blob *blob); /** + * Get a buffer with the filtered content of a blob. + * + * This applies filters as if the blob was being checked out to the + * working directory under the specified filename. This may apply + * CRLF filtering or other types of changes depending on the file + * attributes set for the blob and the content detected in it. + * + * The output is written into a `git_buf` which the caller must free + * when done (via `git_buf_free`). + * + * If no filters need to be applied, then the `out` buffer will just be + * populated with a pointer to the raw content of the blob. In that case, + * be careful to *not* free the blob until done with the buffer. To keep + * the data detached from the blob, call `git_buf_grow` on the buffer + * with a `want_size` of 0 and the buffer will be reallocated to be + * detached from the blob. + * + * @param out The git_buf to be filled in + * @param blob Pointer to the blob + * @param as_path Path used for file attribute lookups, etc. + * @param check_for_binary_data Should this test if blob content contains + * NUL bytes / looks like binary data before applying filters? + * @return 0 on success or an error code + */ +GIT_EXTERN(int) git_blob_filtered_content( + git_buf *out, + git_blob *blob, + const char *as_path, + int check_for_binary_data); + +/** * Read a file from the working folder of a repository * and write it to the Object Database as a loose blob * diff --git a/include/git2/buffer.h b/include/git2/buffer.h new file mode 100644 index 000000000..36a61e6c9 --- /dev/null +++ b/include/git2/buffer.h @@ -0,0 +1,112 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_git_buf_h__ +#define INCLUDE_git_buf_h__ + +#include "common.h" + +/** + * @file git2/buffer.h + * @brief Buffer export structure + * + * @ingroup Git + * @{ + */ +GIT_BEGIN_DECL + +/** + * A data buffer for exporting data from libgit2 + * + * Sometimes libgit2 wants to return an allocated data buffer to the + * caller and have the caller take responsibility for freeing that memory. + * This can be awkward if the caller does not have easy access to the same + * allocation functions that libgit2 is using. In those cases, libgit2 + * will fill in a `git_buf` and the caller can use `git_buf_free()` to + * release it when they are done. + * + * A `git_buf` may also be used for the caller to pass in a reference to + * a block of memory they hold. In this case, libgit2 will not resize or + * free the memory, but will read from it as needed. + * + * A `git_buf` is a public structure with three fields: + * + * - `ptr` points to the start of the allocated memory. If it is NULL, + * then the `git_buf` is considered empty and libgit2 will feel free + * to overwrite it with new data. + * + * - `size` holds the size (in bytes) of the data that is actually used. + * + * - `asize` holds the known total amount of allocated memory if the `ptr` + * was allocated by libgit2. It may be larger than `size`. If `ptr` + * was not allocated by libgit2 and should not be resized and/or freed, + * then `asize` will be set to zero. + * + * Some APIs may occasionally do something slightly unusual with a buffer, + * such as setting `ptr` to a value that was passed in by the user. In + * those cases, the behavior will be clearly documented by the API. + */ +typedef struct { + char *ptr; + size_t asize, size; +} git_buf; + +/** + * Static initializer for git_buf from static buffer + */ +#define GIT_BUF_INIT_CONST(STR,LEN) { (char *)(STR), 0, (size_t)(LEN) } + +/** + * Free the memory referred to by the git_buf. + * + * Note that this does not free the `git_buf` itself, just the memory + * pointed to by `buffer->ptr`. This will not free the memory if it looks + * like it was not allocated internally, but it will clear the buffer back + * to the empty state. + * + * @param buffer The buffer to deallocate + */ +GIT_EXTERN(void) git_buf_free(git_buf *buffer); + +/** + * Resize the buffer allocation to make more space. + * + * This will attempt to grow the buffer to accomodate the target size. + * + * If the buffer refers to memory that was not allocated by libgit2 (i.e. + * the `asize` field is zero), then `ptr` will be replaced with a newly + * allocated block of data. Be careful so that memory allocated by the + * caller is not lost. As a special variant, if you pass `target_size` as + * 0 and the memory is not allocated by libgit2, this will allocate a new + * buffer of size `size` and copy the external data into it. + * + * Currently, this will never shrink a buffer, only expand it. + * + * If the allocation fails, this will return an error and the buffer will be + * marked as invalid for future operations, invaliding the contents. + * + * @param buffer The buffer to be resized; may or may not be allocated yet + * @param target_size The desired available size + * @return 0 on success, -1 on allocation failure + */ +GIT_EXTERN(int) git_buf_grow(git_buf *buffer, size_t target_size); + +/** + * Set buffer to a copy of some raw data. + * + * @param buffer The buffer to set + * @param data The data to copy into the buffer + * @param datalen The length of the data to copy into the buffer + * @return 0 on success, -1 on allocation failure + */ +GIT_EXTERN(int) git_buf_set( + git_buf *buffer, const void *data, size_t datalen); + +GIT_END_DECL + +/** @} */ + +#endif diff --git a/include/git2/errors.h b/include/git2/errors.h index dc4486ade..a454ac956 100644 --- a/include/git2/errors.h +++ b/include/git2/errors.h @@ -68,6 +68,7 @@ typedef enum { GITERR_FETCHHEAD, GITERR_MERGE, GITERR_SSH, + GITERR_FILTER, } git_error_t; /** diff --git a/include/git2/filter.h b/include/git2/filter.h new file mode 100644 index 000000000..f96b6766b --- /dev/null +++ b/include/git2/filter.h @@ -0,0 +1,142 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_git_filter_h__ +#define INCLUDE_git_filter_h__ + +#include "common.h" +#include "types.h" +#include "oid.h" +#include "buffer.h" + +/** + * @file git2/filter.h + * @brief Git filter APIs + * + * @ingroup Git + * @{ + */ +GIT_BEGIN_DECL + +/** + * Filters are applied in one of two directions: smudging - which is + * exporting a file from the Git object database to the working directory, + * and cleaning - which is importing a file from the working directory to + * the Git object database. These values control which direction of + * change is being applied. + */ +typedef enum { + GIT_FILTER_TO_WORKTREE = 0, + GIT_FILTER_SMUDGE = GIT_FILTER_TO_WORKTREE, + GIT_FILTER_TO_ODB = 1, + GIT_FILTER_CLEAN = GIT_FILTER_TO_ODB, +} git_filter_mode_t; + +/** + * A filter that can transform file data + * + * This represents a filter that can be used to transform or even replace + * file data. Libgit2 includes one built in filter and it is possible to + * write your own (see git2/sys/filter.h for information on that). + * + * The two builtin filters are: + * + * * "crlf" which uses the complex rules with the "text", "eol", and + * "crlf" file attributes to decide how to convert between LF and CRLF + * line endings + * * "ident" which replaces "$Id$" in a blob with "$Id: <blob OID>$" upon + * checkout and replaced "$Id: <anything>$" with "$Id$" on checkin. + */ +typedef struct git_filter git_filter; + +/** + * List of filters to be applied + * + * This represents a list of filters to be applied to a file / blob. You + * can build the list with one call, apply it with another, and dispose it + * with a third. In typical usage, there are not many occasions where a + * git_filter_list is needed directly since the library will generally + * handle conversions for you, but it can be convenient to be able to + * build and apply the list sometimes. + */ +typedef struct git_filter_list git_filter_list; + +/** + * Load the filter list for a given path. + * + * This will return 0 (success) but set the output git_filter_list to NULL + * if no filters are requested for the given file. + * + * @param filters Output newly created git_filter_list (or NULL) + * @param repo Repository object that contains `path` + * @param blob The blob to which the filter will be applied (if known) + * @param path Relative path of the file to be filtered + * @param mode Filtering direction (WT->ODB or ODB->WT) + * @return 0 on success (which could still return NULL if no filters are + * needed for the requested file), <0 on error + */ +GIT_EXTERN(int) git_filter_list_load( + git_filter_list **filters, + git_repository *repo, + git_blob *blob, /* can be NULL */ + const char *path, + git_filter_mode_t mode); + +/** + * Apply filter list to a data buffer. + * + * See `git2/buffer.h` for background on `git_buf` objects. + * + * If the `in` buffer holds data allocated by libgit2 (i.e. `in->asize` is + * not zero), then it will be overwritten when applying the filters. If + * not, then it will be left untouched. + * + * If there are no filters to apply (or `filters` is NULL), then the `out` + * buffer will reference the `in` buffer data (with `asize` set to zero) + * instead of allocating data. This keeps allocations to a minimum, but + * it means you have to be careful about freeing the `in` data since `out` + * may be pointing to it! + * + * @param out Buffer to store the result of the filtering + * @param filters A loaded git_filter_list (or NULL) + * @param in Buffer containing the data to filter + * @return 0 on success, an error code otherwise + */ +GIT_EXTERN(int) git_filter_list_apply_to_data( + git_buf *out, + git_filter_list *filters, + git_buf *in); + +/** + * Apply filter list to the contents of a file on disk + */ +GIT_EXTERN(int) git_filter_list_apply_to_file( + git_buf *out, + git_filter_list *filters, + git_repository *repo, + const char *path); + +/** + * Apply filter list to the contents of a blob + */ +GIT_EXTERN(int) git_filter_list_apply_to_blob( + git_buf *out, + git_filter_list *filters, + git_blob *blob); + +/** + * Free a git_filter_list + * + * @param filters A git_filter_list created by `git_filter_list_load` + */ +GIT_EXTERN(void) git_filter_list_free(git_filter_list *filters); + + +GIT_END_DECL + +/** @} */ + +#endif diff --git a/include/git2/sys/filter.h b/include/git2/sys/filter.h new file mode 100644 index 000000000..94ad3aed4 --- /dev/null +++ b/include/git2/sys/filter.h @@ -0,0 +1,292 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_sys_git_filter_h__ +#define INCLUDE_sys_git_filter_h__ + +#include "git2/filter.h" + +/** + * @file git2/sys/filter.h + * @brief Git filter backend and plugin routines + * @defgroup git_backend Git custom backend APIs + * @ingroup Git + * @{ + */ +GIT_BEGIN_DECL + +/** + * Look up a filter by name + * + * @param name The name of the filter + * @return Pointer to the filter object or NULL if not found + */ +GIT_EXTERN(git_filter *) git_filter_lookup(const char *name); + +#define GIT_FILTER_CRLF "crlf" +#define GIT_FILTER_IDENT "ident" + +/** + * This is priority that the internal CRLF filter will be registered with + */ +#define GIT_FILTER_CRLF_PRIORITY 0 + +/** + * This is priority that the internal ident filter will be registered with + */ +#define GIT_FILTER_IDENT_PRIORITY 100 + +/** + * This is priority to use with a custom filter to imitate a core Git + * filter driver, so that it will be run last on checkout and first on + * checkin. You do not have to use this, but it helps compatibility. + */ +#define GIT_FILTER_DRIVER_PRIORITY 200 + +/** + * Create a new empty filter list + * + * Normally you won't use this because `git_filter_list_load` will create + * the filter list for you, but you can use this in combination with the + * `git_filter_lookup` and `git_filter_list_push` functions to assemble + * your own chains of filters. + */ +GIT_EXTERN(int) git_filter_list_new( + git_filter_list **out, git_repository *repo, git_filter_mode_t mode); + +/** + * Add a filter to a filter list with the given payload. + * + * Normally you won't have to do this because the filter list is created + * by calling the "check" function on registered filters when the filter + * attributes are set, but this does allow more direct manipulation of + * filter lists when desired. + * + * Note that normally the "check" function can set up a payload for the + * filter. Using this function, you can either pass in a payload if you + * know the expected payload format, or you can pass NULL. Some filters + * may fail with a NULL payload. Good luck! + */ +GIT_EXTERN(int) git_filter_list_push( + git_filter_list *fl, git_filter *filter, void *payload); + +/** + * Look up how many filters are in the list + * + * We will attempt to apply all of these filters to any data passed in, + * but note that the filter apply action still has the option of skipping + * data that is passed in (for example, the CRLF filter will skip data + * that appears to be binary). + * + * @param fl A filter list + * @return The number of filters in the list + */ +GIT_EXTERN(size_t) git_filter_list_length(const git_filter_list *fl); + +/** + * A filter source represents a file/blob to be processed + */ +typedef struct git_filter_source git_filter_source; + +/** + * Get the repository that the source data is coming from. + */ +GIT_EXTERN(git_repository *) git_filter_source_repo(const git_filter_source *src); + +/** + * Get the path that the source data is coming from. + */ +GIT_EXTERN(const char *) git_filter_source_path(const git_filter_source *src); + +/** + * Get the file mode of the source file + * If the mode is unknown, this will return 0 + */ +GIT_EXTERN(uint16_t) git_filter_source_filemode(const git_filter_source *src); + +/** + * Get the OID of the source + * If the OID is unknown (often the case with GIT_FILTER_CLEAN) then + * this will return NULL. + */ +GIT_EXTERN(const git_oid *) git_filter_source_id(const git_filter_source *src); + +/** + * Get the git_filter_mode_t to be applied + */ +GIT_EXTERN(git_filter_mode_t) git_filter_source_mode(const git_filter_source *src); + +/* + * struct git_filter + * + * The filter lifecycle: + * - initialize - first use of filter + * - shutdown - filter removed/unregistered from system + * - check - considering filter for file + * - apply - apply filter to file contents + * - cleanup - done with file + */ + +/** + * Initialize callback on filter + * + * Specified as `filter.initialize`, this is an optional callback invoked + * before a filter is first used. It will be called once at most. + * + * If non-NULL, the filter's `initialize` callback will be invoked right + * before the first use of the filter, so you can defer expensive + * initialization operations (in case libgit2 is being used in a way that + * doesn't need the filter). + */ +typedef int (*git_filter_init_fn)(git_filter *self); + +/** + * Shutdown callback on filter + * + * Specified as `filter.shutdown`, this is an optional callback invoked + * when the filter is unregistered or when libgit2 is shutting down. It + * will be called once at most and should release resources as needed. + * + * Typically this function will free the `git_filter` object itself. + */ +typedef void (*git_filter_shutdown_fn)(git_filter *self); + +/** + * Callback to decide if a given source needs this filter + * + * Specified as `filter.check`, this is an optional callback that checks + * if filtering is needed for a given source. + * + * It should return 0 if the filter should be applied (i.e. success), + * GIT_PASSTHROUGH if the filter should not be applied, or an error code + * to fail out of the filter processing pipeline and return to the caller. + * + * The `attr_values` will be set to the values of any attributes given in + * the filter definition. See `git_filter` below for more detail. + * + * The `payload` will be a pointer to a reference payload for the filter. + * This will start as NULL, but `check` can assign to this pointer for + * later use by the `apply` callback. Note that the value should be heap + * allocated (not stack), so that it doesn't go away before the `apply` + * callback can use it. If a filter allocates and assigns a value to the + * `payload`, it will need a `cleanup` callback to free the payload. + */ +typedef int (*git_filter_check_fn)( + git_filter *self, + void **payload, /* points to NULL ptr on entry, may be set */ + const git_filter_source *src, + const char **attr_values); + +/** + * Callback to actually perform the data filtering + * + * Specified as `filter.apply`, this is the callback that actually filters + * data. If it successfully writes the output, it should return 0. Like + * `check`, it can return GIT_PASSTHROUGH to indicate that the filter + * doesn't want to run. Other error codes will stop filter processing and + * return to the caller. + * + * The `payload` value will refer to any payload that was set by the + * `check` callback. It may be read from or written to as needed. + */ +typedef int (*git_filter_apply_fn)( + git_filter *self, + void **payload, /* may be read and/or set */ + git_buf *to, + const git_buf *from, + const git_filter_source *src); + +/** + * Callback to clean up after filtering has been applied + * + * Specified as `filter.cleanup`, this is an optional callback invoked + * after the filter has been applied. If the `check` or `apply` callbacks + * allocated a `payload` to keep per-source filter state, use this + * callback to free that payload and release resources as required. + */ +typedef void (*git_filter_cleanup_fn)( + git_filter *self, + void *payload); + +/** + * Filter structure used to register custom filters. + * + * To associate extra data with a filter, allocate extra data and put the + * `git_filter` struct at the start of your data buffer, then cast the + * `self` pointer to your larger structure when your callback is invoked. + * + * `version` should be set to GIT_FILTER_VERSION + * + * `attributes` is a whitespace-separated list of attribute names to check + * for this filter (e.g. "eol crlf text"). If the attribute name is bare, + * it will be simply loaded and passed to the `check` callback. If it has + * a value (i.e. "name=value"), the attribute must match that value for + * the filter to be applied. + * + * The `initialize`, `shutdown`, `check`, `apply`, and `cleanup` callbacks + * are all documented above with the respective function pointer typedefs. + */ +struct git_filter { + unsigned int version; + + const char *attributes; + + git_filter_init_fn initialize; + git_filter_shutdown_fn shutdown; + git_filter_check_fn check; + git_filter_apply_fn apply; + git_filter_cleanup_fn cleanup; +}; + +#define GIT_FILTER_VERSION 1 + +/** + * Register a filter under a given name with a given priority. + * + * As mentioned elsewhere, the initialize callback will not be invoked + * immediately. It is deferred until the filter is used in some way. + * + * A filter's attribute checks and `check` and `apply` callbacks will be + * issued in order of `priority` on smudge (to workdir), and in reverse + * order of `priority` on clean (to odb). + * + * Two filters are preregistered with libgit2: + * - GIT_FILTER_CRLF with priority 0 + * - GIT_FILTER_IDENT with priority 100 + * + * Currently the filter registry is not thread safe, so any registering or + * deregistering of filters must be done outside of any possible usage of + * the filters (i.e. during application setup or shutdown). + * + * @param name A name by which the filter can be referenced. Attempting + * to register with an in-use name will return GIT_EEXISTS. + * @param filter The filter definition. This pointer will be stored as is + * by libgit2 so it must be a durable allocation (either static + * or on the heap). + * @param priority The priority for filter application + * @return 0 on successful registry, error code <0 on failure + */ +GIT_EXTERN(int) git_filter_register( + const char *name, git_filter *filter, int priority); + +/** + * Remove the filter with the given name + * + * Attempting to remove the builtin libgit2 filters is not permitted and + * will return an error. + * + * Currently the filter registry is not thread safe, so any registering or + * deregistering of filters must be done outside of any possible usage of + * the filters (i.e. during application setup or shutdown). + * + * @param name The name under which the filter was registered + * @return 0 on success, error code <0 on failure + */ +GIT_EXTERN(int) git_filter_unregister(const char *name); + +/** @} */ +GIT_END_DECL +#endif |
