summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <ps@pks.im>2017-11-11 17:12:31 +0000
committerPatrick Steinhardt <ps@pks.im>2017-11-11 17:12:31 +0000
commit1953c68b7550626b4ca6c37294d8d18479222143 (patch)
treedcd08f0b106693bf61803049eff41a137f818bf0
parent7bdfc0a68506709b05c40ac519ded044a5ad7124 (diff)
downloadlibgit2-1953c68b7550626b4ca6c37294d8d18479222143.tar.gz
config_file: split out module to parse config files
The configuration file code grew quite big and intermingles both actual configuration logic as well as the parsing logic of the configuration syntax. This makes it hard to refactor the parsing logic on its own and convert it to make use of our new parsing context module. Refactor the code and split it up into two parts. The config file code will only handle actual handling of configuration files, includes and writing new files. The newly created config parser module is then only responsible for parsing the actual contents of a configuration file, leaving everything else to callbacks provided to its provided function `git_config_parse`.
-rw-r--r--src/config_file.c707
-rw-r--r--src/config_parse.c658
-rw-r--r--src/config_parse.h62
3 files changed, 746 insertions, 681 deletions
diff --git a/src/config_file.c b/src/config_file.c
index cc4e7b3b7..8765259b3 100644
--- a/src/config_file.c
+++ b/src/config_file.c
@@ -17,6 +17,7 @@
#include "git2/types.h"
#include "strmap.h"
#include "array.h"
+#include "config_parse.h"
#include <ctype.h>
#include <sys/types.h>
@@ -75,20 +76,6 @@ typedef struct git_config_file_iter {
(iter) && (((tmp) = CVAR_LIST_NEXT(iter) || 1));\
(iter) = (tmp))
-struct config_file {
- git_oid checksum;
- char *path;
- git_array_t(struct config_file) includes;
-};
-
-struct reader {
- struct config_file *file;
- git_buf buffer;
- char *read_ptr;
- int line_number;
- int eof;
-};
-
typedef struct {
git_atomic refcount;
git_strmap *values;
@@ -107,6 +94,8 @@ typedef struct {
git_config_level_t level;
const git_repository *repo;
+ git_array_t(git_config_parser) readers;
+
bool locked;
git_filebuf locked_buf;
git_buf locked_content;
@@ -120,19 +109,13 @@ typedef struct {
diskfile_backend *snapshot_from;
} diskfile_readonly_backend;
-static int config_read(git_strmap *values, const git_repository *repo, struct config_file *file, git_config_level_t level, int depth);
+static int config_read(git_strmap *values, const git_repository *repo, git_config_file *file, git_config_level_t level, int depth);
static int config_write(diskfile_backend *cfg, const char *orig_key, const char *key, const regex_t *preg, const char *value);
static char *escape_value(const char *ptr);
int git_config_file__snapshot(git_config_backend **out, diskfile_backend *in);
static int config_snapshot(git_config_backend **out, git_config_backend *in);
-static void set_parse_error(struct reader *reader, int col, const char *error_str)
-{
- giterr_set(GITERR_CONFIG, "failed to parse config file: %s (in %s:%d, column %d)",
- error_str, reader->file->path, reader->line_number, col);
-}
-
static int config_error_readonly(void)
{
giterr_set(GITERR_CONFIG, "this backend is read-only");
@@ -293,7 +276,6 @@ static int config_open(git_config_backend *cfg, git_config_level_t level, const
if ((res = refcounted_strmap_alloc(&b->header.values)) < 0)
return res;
- /* It's fine if the file doesn't exist */
if (!git_path_exists(b->file.path))
return 0;
@@ -307,11 +289,11 @@ static int config_open(git_config_backend *cfg, git_config_level_t level, const
static int config_is_modified(int *modified, struct config_file *file)
{
- struct config_file *include;
+ git_config_file *include;
git_buf buf = GIT_BUF_INIT;
git_oid hash;
uint32_t i;
- int error;
+ int error = 0;
*modified = 0;
@@ -341,7 +323,7 @@ static int config_refresh(git_config_backend *cfg)
{
diskfile_backend *b = (diskfile_backend *)cfg;
refcounted_strmap *values = NULL, *tmp;
- struct config_file *include;
+ git_config_file *include;
int error, modified;
uint32_t i;
@@ -885,397 +867,6 @@ int git_config_file__snapshot(git_config_backend **out, diskfile_backend *in)
return 0;
}
-static int reader_getchar_raw(struct reader *reader)
-{
- int c;
-
- c = *reader->read_ptr++;
-
- /*
- Win 32 line breaks: if we find a \r\n sequence,
- return only the \n as a newline
- */
- if (c == '\r' && *reader->read_ptr == '\n') {
- reader->read_ptr++;
- c = '\n';
- }
-
- if (c == '\n')
- reader->line_number++;
-
- if (c == 0) {
- reader->eof = 1;
- c = '\0';
- }
-
- return c;
-}
-
-#define SKIP_WHITESPACE (1 << 1)
-#define SKIP_COMMENTS (1 << 2)
-
-static int reader_getchar(struct reader *reader, int flags)
-{
- const int skip_whitespace = (flags & SKIP_WHITESPACE);
- const int skip_comments = (flags & SKIP_COMMENTS);
- int c;
-
- assert(reader->read_ptr);
-
- do {
- c = reader_getchar_raw(reader);
- } while (c != '\n' && c != '\0' && skip_whitespace && git__isspace(c));
-
- if (skip_comments && (c == '#' || c == ';')) {
- do {
- c = reader_getchar_raw(reader);
- } while (c != '\n' && c != '\0');
- }
-
- return c;
-}
-
-/*
- * Read the next char, but don't move the reading pointer.
- */
-static int reader_peek(struct reader *reader, int flags)
-{
- void *old_read_ptr;
- int old_lineno, old_eof;
- int ret;
-
- assert(reader->read_ptr);
-
- old_read_ptr = reader->read_ptr;
- old_lineno = reader->line_number;
- old_eof = reader->eof;
-
- ret = reader_getchar(reader, flags);
-
- reader->read_ptr = old_read_ptr;
- reader->line_number = old_lineno;
- reader->eof = old_eof;
-
- return ret;
-}
-
-/*
- * Read and consume a line, returning it in newly-allocated memory.
- */
-static char *reader_readline(struct reader *reader, bool skip_whitespace)
-{
- char *line = NULL;
- char *line_src, *line_end;
- size_t line_len, alloc_len;
-
- line_src = reader->read_ptr;
-
- if (skip_whitespace) {
- /* Skip empty empty lines */
- while (git__isspace(*line_src))
- ++line_src;
- }
-
- line_end = strchr(line_src, '\n');
-
- /* no newline at EOF */
- if (line_end == NULL)
- line_end = strchr(line_src, 0);
-
- line_len = line_end - line_src;
-
- if (GIT_ADD_SIZET_OVERFLOW(&alloc_len, line_len, 1) ||
- (line = git__malloc(alloc_len)) == NULL) {
- return NULL;
- }
-
- memcpy(line, line_src, line_len);
-
- do line[line_len] = '\0';
- while (line_len-- > 0 && git__isspace(line[line_len]));
-
- if (*line_end == '\n')
- line_end++;
-
- if (*line_end == '\0')
- reader->eof = 1;
-
- reader->line_number++;
- reader->read_ptr = line_end;
-
- return line;
-}
-
-/*
- * Consume a line, without storing it anywhere
- */
-static void reader_consume_line(struct reader *reader)
-{
- char *line_start, *line_end;
-
- line_start = reader->read_ptr;
- line_end = strchr(line_start, '\n');
- /* No newline at EOF */
- if(line_end == NULL){
- line_end = strchr(line_start, '\0');
- }
-
- if (*line_end == '\n')
- line_end++;
-
- if (*line_end == '\0')
- reader->eof = 1;
-
- reader->line_number++;
- reader->read_ptr = line_end;
-}
-
-GIT_INLINE(int) config_keychar(int c)
-{
- return isalnum(c) || c == '-';
-}
-
-static int parse_section_header_ext(struct reader *reader, const char *line, const char *base_name, char **section_name)
-{
- int c, rpos;
- char *first_quote, *last_quote;
- git_buf buf = GIT_BUF_INIT;
- size_t quoted_len, alloc_len, base_name_len = strlen(base_name);
-
- /*
- * base_name is what came before the space. We should be at the
- * first quotation mark, except for now, line isn't being kept in
- * sync so we only really use it to calculate the length.
- */
-
- first_quote = strchr(line, '"');
- if (first_quote == NULL) {
- set_parse_error(reader, 0, "Missing quotation marks in section header");
- goto end_error;
- }
-
- last_quote = strrchr(line, '"');
- quoted_len = last_quote - first_quote;
-
- if (quoted_len == 0) {
- set_parse_error(reader, 0, "Missing closing quotation mark in section header");
- goto end_error;
- }
-
- GITERR_CHECK_ALLOC_ADD(&alloc_len, base_name_len, quoted_len);
- GITERR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 2);
-
- if (git_buf_grow(&buf, alloc_len) < 0 ||
- git_buf_printf(&buf, "%s.", base_name) < 0)
- goto end_error;
-
- rpos = 0;
-
- line = first_quote;
- c = line[++rpos];
-
- /*
- * At the end of each iteration, whatever is stored in c will be
- * added to the string. In case of error, jump to out
- */
- do {
-
- switch (c) {
- case 0:
- set_parse_error(reader, 0, "Unexpected end-of-line in section header");
- goto end_error;
-
- case '"':
- goto end_parse;
-
- case '\\':
- c = line[++rpos];
-
- if (c == 0) {
- set_parse_error(reader, rpos, "Unexpected end-of-line in section header");
- goto end_error;
- }
-
- default:
- break;
- }
-
- git_buf_putc(&buf, (char)c);
- c = line[++rpos];
- } while (line + rpos < last_quote);
-
-end_parse:
- if (git_buf_oom(&buf))
- goto end_error;
-
- if (line[rpos] != '"' || line[rpos + 1] != ']') {
- set_parse_error(reader, rpos, "Unexpected text after closing quotes");
- git_buf_free(&buf);
- return -1;
- }
-
- *section_name = git_buf_detach(&buf);
- return 0;
-
-end_error:
- git_buf_free(&buf);
-
- return -1;
-}
-
-static int parse_section_header(struct reader *reader, char **section_out)
-{
- char *name, *name_end;
- int name_length, c, pos;
- int result;
- char *line;
- size_t line_len;
-
- line = reader_readline(reader, true);
- if (line == NULL)
- return -1;
-
- /* find the end of the variable's name */
- name_end = strrchr(line, ']');
- if (name_end == NULL) {
- git__free(line);
- set_parse_error(reader, 0, "Missing ']' in section header");
- return -1;
- }
-
- GITERR_CHECK_ALLOC_ADD(&line_len, (size_t)(name_end - line), 1);
- name = git__malloc(line_len);
- GITERR_CHECK_ALLOC(name);
-
- name_length = 0;
- pos = 0;
-
- /* Make sure we were given a section header */
- c = line[pos++];
- assert(c == '[');
-
- c = line[pos++];
-
- do {
- if (git__isspace(c)){
- name[name_length] = '\0';
- result = parse_section_header_ext(reader, line, name, section_out);
- git__free(line);
- git__free(name);
- return result;
- }
-
- if (!config_keychar(c) && c != '.') {
- set_parse_error(reader, pos, "Unexpected character in header");
- goto fail_parse;
- }
-
- name[name_length++] = (char)git__tolower(c);
-
- } while ((c = line[pos++]) != ']');
-
- if (line[pos - 1] != ']') {
- set_parse_error(reader, pos, "Unexpected end of file");
- goto fail_parse;
- }
-
- git__free(line);
-
- name[name_length] = 0;
- *section_out = name;
-
- return 0;
-
-fail_parse:
- git__free(line);
- git__free(name);
- return -1;
-}
-
-static int skip_bom(struct reader *reader)
-{
- git_bom_t bom;
- int bom_offset = git_buf_text_detect_bom(&bom,
- &reader->buffer, reader->read_ptr - reader->buffer.ptr);
-
- if (bom == GIT_BOM_UTF8)
- reader->read_ptr += bom_offset;
-
- /* TODO: reference implementation is pretty stupid with BoM */
-
- return 0;
-}
-
-/*
- (* basic types *)
- digit = "0".."9"
- integer = digit { digit }
- alphabet = "a".."z" + "A" .. "Z"
-
- section_char = alphabet | "." | "-"
- extension_char = (* any character except newline *)
- any_char = (* any character *)
- variable_char = "alphabet" | "-"
-
-
- (* actual grammar *)
- config = { section }
-
- section = header { definition }
-
- header = "[" section [subsection | subsection_ext] "]"
-
- subsection = "." section
- subsection_ext = "\"" extension "\""
-
- section = section_char { section_char }
- extension = extension_char { extension_char }
-
- definition = variable_name ["=" variable_value] "\n"
-
- variable_name = variable_char { variable_char }
- variable_value = string | boolean | integer
-
- string = quoted_string | plain_string
- quoted_string = "\"" plain_string "\""
- plain_string = { any_char }
-
- boolean = boolean_true | boolean_false
- boolean_true = "yes" | "1" | "true" | "on"
- boolean_false = "no" | "0" | "false" | "off"
-*/
-
-static int strip_comments(char *line, int in_quotes)
-{
- int quote_count = in_quotes, backslash_count = 0;
- char *ptr;
-
- for (ptr = line; *ptr; ++ptr) {
- if (ptr[0] == '"' && ptr > line && ptr[-1] != '\\')
- quote_count++;
-
- if ((ptr[0] == ';' || ptr[0] == '#') &&
- (quote_count % 2) == 0 &&
- (backslash_count % 2) == 0) {
- ptr[0] = '\0';
- break;
- }
-
- if (ptr[0] == '\\')
- backslash_count++;
- else
- backslash_count = 0;
- }
-
- /* skip any space at the end */
- while (ptr > line && git__isspace(ptr[-1])) {
- ptr--;
- }
- ptr[0] = '\0';
-
- return quote_count;
-}
-
static int included_path(git_buf *out, const char *dir, const char *path)
{
/* From the user's home */
@@ -1285,9 +876,6 @@ static int included_path(git_buf *out, const char *dir, const char *path)
return git_path_join_unrooted(out, path, dir, NULL);
}
-static const char *escapes = "ntb\"\\";
-static const char *escaped = "\n\t\b\"\\";
-
/* Escape the values to write them to the file */
static char *escape_value(const char *ptr)
{
@@ -1305,9 +893,9 @@ static char *escape_value(const char *ptr)
return NULL;
while (*ptr != '\0') {
- if ((esc = strchr(escaped, *ptr)) != NULL) {
+ if ((esc = strchr(git_config_escaped, *ptr)) != NULL) {
git_buf_putc(&buf, '\\');
- git_buf_putc(&buf, escapes[esc - escaped]);
+ git_buf_putc(&buf, git_config_escapes[esc - git_config_escaped]);
} else {
git_buf_putc(&buf, *ptr);
}
@@ -1322,254 +910,6 @@ static char *escape_value(const char *ptr)
return git_buf_detach(&buf);
}
-/* '\"' -> '"' etc */
-static int unescape_line(
- char **out, bool *is_multi, const char *ptr, int quote_count)
-{
- char *str, *fixed, *esc;
- size_t ptr_len = strlen(ptr), alloc_len;
-
- *is_multi = false;
-
- if (GIT_ADD_SIZET_OVERFLOW(&alloc_len, ptr_len, 1) ||
- (str = git__malloc(alloc_len)) == NULL) {
- return -1;
- }
-
- fixed = str;
-
- while (*ptr != '\0') {
- if (*ptr == '"') {
- quote_count++;
- } else if (*ptr != '\\') {
- *fixed++ = *ptr;
- } else {
- /* backslash, check the next char */
- ptr++;
- /* if we're at the end, it's a multiline, so keep the backslash */
- if (*ptr == '\0') {
- *is_multi = true;
- goto done;
- }
- if ((esc = strchr(escapes, *ptr)) != NULL) {
- *fixed++ = escaped[esc - escapes];
- } else {
- git__free(str);
- giterr_set(GITERR_CONFIG, "invalid escape at %s", ptr);
- return -1;
- }
- }
- ptr++;
- }
-
-done:
- *fixed = '\0';
- *out = str;
-
- return 0;
-}
-
-static int parse_multiline_variable(struct reader *reader, git_buf *value, int in_quotes)
-{
- char *line = NULL, *proc_line = NULL;
- int quote_count;
- bool multiline;
-
- /* Check that the next line exists */
- line = reader_readline(reader, false);
- if (line == NULL)
- return -1;
-
- /* We've reached the end of the file, there is no continuation.
- * (this is not an error).
- */
- if (line[0] == '\0') {
- git__free(line);
- return 0;
- }
-
- quote_count = strip_comments(line, !!in_quotes);
-
- /* If it was just a comment, pretend it didn't exist */
- if (line[0] == '\0') {
- git__free(line);
- return parse_multiline_variable(reader, value, quote_count);
- /* TODO: unbounded recursion. This **could** be exploitable */
- }
-
- if (unescape_line(&proc_line, &multiline, line, in_quotes) < 0) {
- git__free(line);
- return -1;
- }
- /* add this line to the multiline var */
-
- git_buf_puts(value, proc_line);
- git__free(line);
- git__free(proc_line);
-
- /*
- * If we need to continue reading the next line, let's just
- * keep putting stuff in the buffer
- */
- if (multiline)
- return parse_multiline_variable(reader, value, quote_count);
-
- return 0;
-}
-
-GIT_INLINE(bool) is_namechar(char c)
-{
- return isalnum(c) || c == '-';
-}
-
-static int parse_name(
- char **name, const char **value, struct reader *reader, const char *line)
-{
- const char *name_end = line, *value_start;
-
- *name = NULL;
- *value = NULL;
-
- while (*name_end && is_namechar(*name_end))
- name_end++;
-
- if (line == name_end) {
- set_parse_error(reader, 0, "Invalid configuration key");
- return -1;
- }
-
- value_start = name_end;
-
- while (*value_start && git__isspace(*value_start))
- value_start++;
-
- if (*value_start == '=') {
- *value = value_start + 1;
- } else if (*value_start) {
- set_parse_error(reader, 0, "Invalid configuration key");
- return -1;
- }
-
- if ((*name = git__strndup(line, name_end - line)) == NULL)
- return -1;
-
- return 0;
-}
-
-static int parse_variable(struct reader *reader, char **var_name, char **var_value)
-{
- const char *value_start = NULL;
- char *line;
- int quote_count;
- bool multiline;
-
- line = reader_readline(reader, true);
- if (line == NULL)
- return -1;
-
- quote_count = strip_comments(line, 0);
-
- /* If there is no value, boolean true is assumed */
- *var_value = NULL;
-
- if (parse_name(var_name, &value_start, reader, line) < 0)
- goto on_error;
-
- /*
- * Now, let's try to parse the value
- */
- if (value_start != NULL) {
- while (git__isspace(value_start[0]))
- value_start++;
-
- if (unescape_line(var_value, &multiline, value_start, 0) < 0)
- goto on_error;
-
- if (multiline) {
- git_buf multi_value = GIT_BUF_INIT;
- git_buf_attach(&multi_value, *var_value, 0);
-
- if (parse_multiline_variable(reader, &multi_value, quote_count) < 0 ||
- git_buf_oom(&multi_value)) {
- git_buf_free(&multi_value);
- goto on_error;
- }
-
- *var_value = git_buf_detach(&multi_value);
- }
- }
-
- git__free(line);
- return 0;
-
-on_error:
- git__free(*var_name);
- git__free(line);
- return -1;
-}
-
-static int config_parse(
- struct reader *reader,
- int (*on_section)(struct reader *reader, const char *current_section, const char *line, size_t line_len, void *data),
- int (*on_variable)(struct reader *reader, const char *current_section, char *var_name, char *var_value, const char *line, size_t line_len, void *data),
- int (*on_comment)(struct reader *reader, const char *line, size_t line_len, void *data),
- int (*on_eof)(struct reader *reader, const char *current_section, void *data),
- void *data)
-{
- char *current_section = NULL, *var_name, *var_value, *line_start;
- char c;
- size_t line_len;
- int result = 0;
-
- skip_bom(reader);
-
- while (result == 0 && !reader->eof) {
- line_start = reader->read_ptr;
-
- c = reader_peek(reader, SKIP_WHITESPACE);
-
- switch (c) {
- case '\0': /* EOF when peeking, set EOF in the reader to exit the loop */
- reader->eof = 1;
- break;
-
- case '[': /* section header, new section begins */
- git__free(current_section);
- current_section = NULL;
-
- if ((result = parse_section_header(reader, &current_section)) == 0 && on_section) {
- line_len = reader->read_ptr - line_start;
- result = on_section(reader, current_section, line_start, line_len, data);
- }
- break;
-
- case '\n': /* comment or whitespace-only */
- case ';':
- case '#':
- reader_consume_line(reader);
-
- if (on_comment) {
- line_len = reader->read_ptr - line_start;
- result = on_comment(reader, line_start, line_len, data);
- }
- break;
-
- default: /* assume variable declaration */
- if ((result = parse_variable(reader, &var_name, &var_value)) == 0 && on_variable) {
- line_len = reader->read_ptr - line_start;
- result = on_variable(reader, current_section, var_name, var_value, line_start, line_len, data);
- }
- break;
- }
- }
-
- if (on_eof)
- result = on_eof(reader, current_section, data);
-
- git__free(current_section);
- return result;
-}
-
struct parse_data {
const git_repository *repo;
const char *file_path;
@@ -1578,7 +918,7 @@ struct parse_data {
int depth;
};
-static int parse_include(struct reader *reader,
+static int parse_include(git_config_parser *reader,
struct parse_data *parse_data, const char *file)
{
struct config_file *include;
@@ -1680,7 +1020,7 @@ static const struct {
{ "gitdir/i:", conditional_match_gitdir_i }
};
-static int parse_conditional_include(struct reader *reader,
+static int parse_conditional_include(git_config_parser *reader,
struct parse_data *parse_data, const char *section, const char *file)
{
char *condition;
@@ -1714,7 +1054,7 @@ static int parse_conditional_include(struct reader *reader,
}
static int read_on_variable(
- struct reader *reader,
+ git_config_parser *reader,
const char *current_section,
char *var_name,
char *var_value,
@@ -1769,12 +1109,12 @@ static int read_on_variable(
static int config_read(
git_strmap *values,
const git_repository *repo,
- struct config_file *file,
+ git_config_file *file,
git_config_level_t level,
int depth)
{
struct parse_data parse_data;
- struct reader reader;
+ git_config_parser reader;
int error;
if (depth >= MAX_INCLUDE_DEPTH) {
@@ -1806,7 +1146,7 @@ static int config_read(
parse_data.level = level;
parse_data.depth = depth;
- error = config_parse(&reader, NULL, read_on_variable, NULL, NULL, &parse_data);
+ error = git_config_parse(&reader, NULL, read_on_variable, NULL, NULL, &parse_data);
out:
git_buf_free(&reader.buffer);
@@ -1909,7 +1249,7 @@ static int write_value(struct write_data *write_data)
}
static int write_on_section(
- struct reader *reader,
+ git_config_parser *reader,
const char *current_section,
const char *line,
size_t line_len,
@@ -1945,7 +1285,7 @@ static int write_on_section(
}
static int write_on_variable(
- struct reader *reader,
+ git_config_parser *reader,
const char *current_section,
char *var_name,
char *var_value,
@@ -1995,7 +1335,7 @@ static int write_on_variable(
return write_value(write_data);
}
-static int write_on_comment(struct reader *reader, const char *line, size_t line_len, void *data)
+static int write_on_comment(git_config_parser *reader, const char *line, size_t line_len, void *data)
{
struct write_data *write_data;
@@ -2006,7 +1346,7 @@ static int write_on_comment(struct reader *reader, const char *line, size_t line
}
static int write_on_eof(
- struct reader *reader, const char *current_section, void *data)
+ git_config_parser *reader, const char *current_section, void *data)
{
struct write_data *write_data = (struct write_data *)data;
int result = 0;
@@ -2045,7 +1385,7 @@ static int config_write(diskfile_backend *cfg, const char *orig_key, const char
char *orig_section, *section, *orig_name, *name, *ldot;
git_filebuf file = GIT_FILEBUF_INIT;
git_buf buf = GIT_BUF_INIT;
- struct reader reader;
+ git_config_parser reader;
struct write_data write_data;
memset(&reader, 0, sizeof(reader));
@@ -2100,7 +1440,12 @@ static int config_write(diskfile_backend *cfg, const char *orig_key, const char
write_data.preg = preg;
write_data.value = value;
- result = config_parse(&reader, write_on_section, write_on_variable, write_on_comment, write_on_eof, &write_data);
+ result = git_config_parse(&reader,
+ write_on_section,
+ write_on_variable,
+ write_on_comment,
+ write_on_eof,
+ &write_data);
git__free(section);
git__free(orig_section);
git_buf_free(&write_data.buffered_comment);
diff --git a/src/config_parse.c b/src/config_parse.c
new file mode 100644
index 000000000..9d0ee797c
--- /dev/null
+++ b/src/config_parse.c
@@ -0,0 +1,658 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+
+#include "config_parse.h"
+
+#include "buf_text.h"
+
+#include <ctype.h>
+
+static void set_parse_error(git_config_parser *reader, int col, const char *error_str)
+{
+ giterr_set(GITERR_CONFIG, "failed to parse config file: %s (in %s:%d, column %d)",
+ error_str, reader->file->path, reader->line_number, col);
+}
+
+static int reader_getchar_raw(git_config_parser *reader)
+{
+ int c;
+
+ c = *reader->read_ptr++;
+
+ /*
+ Win 32 line breaks: if we find a \r\n sequence,
+ return only the \n as a newline
+ */
+ if (c == '\r' && *reader->read_ptr == '\n') {
+ reader->read_ptr++;
+ c = '\n';
+ }
+
+ if (c == '\n')
+ reader->line_number++;
+
+ if (c == 0) {
+ reader->eof = 1;
+ c = '\0';
+ }
+
+ return c;
+}
+
+#define SKIP_WHITESPACE (1 << 1)
+#define SKIP_COMMENTS (1 << 2)
+
+static int reader_getchar(git_config_parser *reader, int flags)
+{
+ const int skip_whitespace = (flags & SKIP_WHITESPACE);
+ const int skip_comments = (flags & SKIP_COMMENTS);
+ int c;
+
+ assert(reader->read_ptr);
+
+ do {
+ c = reader_getchar_raw(reader);
+ } while (c != '\n' && c != '\0' && skip_whitespace && git__isspace(c));
+
+ if (skip_comments && (c == '#' || c == ';')) {
+ do {
+ c = reader_getchar_raw(reader);
+ } while (c != '\n' && c != '\0');
+ }
+
+ return c;
+}
+
+/*
+ * Read the next char, but don't move the reading pointer.
+ */
+static int reader_peek(git_config_parser *reader, int flags)
+{
+ void *old_read_ptr;
+ int old_lineno, old_eof;
+ int ret;
+
+ assert(reader->read_ptr);
+
+ old_read_ptr = reader->read_ptr;
+ old_lineno = reader->line_number;
+ old_eof = reader->eof;
+
+ ret = reader_getchar(reader, flags);
+
+ reader->read_ptr = old_read_ptr;
+ reader->line_number = old_lineno;
+ reader->eof = old_eof;
+
+ return ret;
+}
+
+/*
+ * Read and consume a line, returning it in newly-allocated memory.
+ */
+static char *reader_readline(git_config_parser *reader, bool skip_whitespace)
+{
+ char *line = NULL;
+ char *line_src, *line_end;
+ size_t line_len, alloc_len;
+
+ line_src = reader->read_ptr;
+
+ if (skip_whitespace) {
+ /* Skip empty empty lines */
+ while (git__isspace(*line_src))
+ ++line_src;
+ }
+
+ line_end = strchr(line_src, '\n');
+
+ /* no newline at EOF */
+ if (line_end == NULL)
+ line_end = strchr(line_src, 0);
+
+ line_len = line_end - line_src;
+
+ if (GIT_ADD_SIZET_OVERFLOW(&alloc_len, line_len, 1) ||
+ (line = git__malloc(alloc_len)) == NULL) {
+ return NULL;
+ }
+
+ memcpy(line, line_src, line_len);
+
+ do line[line_len] = '\0';
+ while (line_len-- > 0 && git__isspace(line[line_len]));
+
+ if (*line_end == '\n')
+ line_end++;
+
+ if (*line_end == '\0')
+ reader->eof = 1;
+
+ reader->line_number++;
+ reader->read_ptr = line_end;
+
+ return line;
+}
+
+/*
+ * Consume a line, without storing it anywhere
+ */
+static void reader_consume_line(git_config_parser *reader)
+{
+ char *line_start, *line_end;
+
+ line_start = reader->read_ptr;
+ line_end = strchr(line_start, '\n');
+ /* No newline at EOF */
+ if(line_end == NULL){
+ line_end = strchr(line_start, '\0');
+ }
+
+ if (*line_end == '\n')
+ line_end++;
+
+ if (*line_end == '\0')
+ reader->eof = 1;
+
+ reader->line_number++;
+ reader->read_ptr = line_end;
+}
+
+GIT_INLINE(int) config_keychar(int c)
+{
+ return isalnum(c) || c == '-';
+}
+
+static int strip_comments(char *line, int in_quotes)
+{
+ int quote_count = in_quotes, backslash_count = 0;
+ char *ptr;
+
+ for (ptr = line; *ptr; ++ptr) {
+ if (ptr[0] == '"' && ptr > line && ptr[-1] != '\\')
+ quote_count++;
+
+ if ((ptr[0] == ';' || ptr[0] == '#') &&
+ (quote_count % 2) == 0 &&
+ (backslash_count % 2) == 0) {
+ ptr[0] = '\0';
+ break;
+ }
+
+ if (ptr[0] == '\\')
+ backslash_count++;
+ else
+ backslash_count = 0;
+ }
+
+ /* skip any space at the end */
+ while (ptr > line && git__isspace(ptr[-1])) {
+ ptr--;
+ }
+ ptr[0] = '\0';
+
+ return quote_count;
+}
+
+
+static int parse_section_header_ext(git_config_parser *reader, const char *line, const char *base_name, char **section_name)
+{
+ int c, rpos;
+ char *first_quote, *last_quote;
+ git_buf buf = GIT_BUF_INIT;
+ size_t quoted_len, alloc_len, base_name_len = strlen(base_name);
+
+ /*
+ * base_name is what came before the space. We should be at the
+ * first quotation mark, except for now, line isn't being kept in
+ * sync so we only really use it to calculate the length.
+ */
+
+ first_quote = strchr(line, '"');
+ if (first_quote == NULL) {
+ set_parse_error(reader, 0, "Missing quotation marks in section header");
+ goto end_error;
+ }
+
+ last_quote = strrchr(line, '"');
+ quoted_len = last_quote - first_quote;
+
+ if (quoted_len == 0) {
+ set_parse_error(reader, 0, "Missing closing quotation mark in section header");
+ goto end_error;
+ }
+
+ GITERR_CHECK_ALLOC_ADD(&alloc_len, base_name_len, quoted_len);
+ GITERR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 2);
+
+ if (git_buf_grow(&buf, alloc_len) < 0 ||
+ git_buf_printf(&buf, "%s.", base_name) < 0)
+ goto end_error;
+
+ rpos = 0;
+
+ line = first_quote;
+ c = line[++rpos];
+
+ /*
+ * At the end of each iteration, whatever is stored in c will be
+ * added to the string. In case of error, jump to out
+ */
+ do {
+
+ switch (c) {
+ case 0:
+ set_parse_error(reader, 0, "Unexpected end-of-line in section header");
+ goto end_error;
+
+ case '"':
+ goto end_parse;
+
+ case '\\':
+ c = line[++rpos];
+
+ if (c == 0) {
+ set_parse_error(reader, rpos, "Unexpected end-of-line in section header");
+ goto end_error;
+ }
+
+ default:
+ break;
+ }
+
+ git_buf_putc(&buf, (char)c);
+ c = line[++rpos];
+ } while (line + rpos < last_quote);
+
+end_parse:
+ if (git_buf_oom(&buf))
+ goto end_error;
+
+ if (line[rpos] != '"' || line[rpos + 1] != ']') {
+ set_parse_error(reader, rpos, "Unexpected text after closing quotes");
+ git_buf_free(&buf);
+ return -1;
+ }
+
+ *section_name = git_buf_detach(&buf);
+ return 0;
+
+end_error:
+ git_buf_free(&buf);
+
+ return -1;
+}
+
+static int parse_section_header(git_config_parser *reader, char **section_out)
+{
+ char *name, *name_end;
+ int name_length, c, pos;
+ int result;
+ char *line;
+ size_t line_len;
+
+ line = reader_readline(reader, true);
+ if (line == NULL)
+ return -1;
+
+ /* find the end of the variable's name */
+ name_end = strrchr(line, ']');
+ if (name_end == NULL) {
+ git__free(line);
+ set_parse_error(reader, 0, "Missing ']' in section header");
+ return -1;
+ }
+
+ GITERR_CHECK_ALLOC_ADD(&line_len, (size_t)(name_end - line), 1);
+ name = git__malloc(line_len);
+ GITERR_CHECK_ALLOC(name);
+
+ name_length = 0;
+ pos = 0;
+
+ /* Make sure we were given a section header */
+ c = line[pos++];
+ assert(c == '[');
+
+ c = line[pos++];
+
+ do {
+ if (git__isspace(c)){
+ name[name_length] = '\0';
+ result = parse_section_header_ext(reader, line, name, section_out);
+ git__free(line);
+ git__free(name);
+ return result;
+ }
+
+ if (!config_keychar(c) && c != '.') {
+ set_parse_error(reader, pos, "Unexpected character in header");
+ goto fail_parse;
+ }
+
+ name[name_length++] = (char)git__tolower(c);
+
+ } while ((c = line[pos++]) != ']');
+
+ if (line[pos - 1] != ']') {
+ set_parse_error(reader, pos, "Unexpected end of file");
+ goto fail_parse;
+ }
+
+ git__free(line);
+
+ name[name_length] = 0;
+ *section_out = name;
+
+ return 0;
+
+fail_parse:
+ git__free(line);
+ git__free(name);
+ return -1;
+}
+
+static int skip_bom(git_config_parser *reader)
+{
+ git_bom_t bom;
+ int bom_offset = git_buf_text_detect_bom(&bom,
+ &reader->buffer, reader->read_ptr - reader->buffer.ptr);
+
+ if (bom == GIT_BOM_UTF8)
+ reader->read_ptr += bom_offset;
+
+ /* TODO: reference implementation is pretty stupid with BoM */
+
+ return 0;
+}
+
+/*
+ (* basic types *)
+ digit = "0".."9"
+ integer = digit { digit }
+ alphabet = "a".."z" + "A" .. "Z"
+
+ section_char = alphabet | "." | "-"
+ extension_char = (* any character except newline *)
+ any_char = (* any character *)
+ variable_char = "alphabet" | "-"
+
+
+ (* actual grammar *)
+ config = { section }
+
+ section = header { definition }
+
+ header = "[" section [subsection | subsection_ext] "]"
+
+ subsection = "." section
+ subsection_ext = "\"" extension "\""
+
+ section = section_char { section_char }
+ extension = extension_char { extension_char }
+
+ definition = variable_name ["=" variable_value] "\n"
+
+ variable_name = variable_char { variable_char }
+ variable_value = string | boolean | integer
+
+ string = quoted_string | plain_string
+ quoted_string = "\"" plain_string "\""
+ plain_string = { any_char }
+
+ boolean = boolean_true | boolean_false
+ boolean_true = "yes" | "1" | "true" | "on"
+ boolean_false = "no" | "0" | "false" | "off"
+*/
+
+/* '\"' -> '"' etc */
+static int unescape_line(
+ char **out, bool *is_multi, const char *ptr, int quote_count)
+{
+ char *str, *fixed, *esc;
+ size_t ptr_len = strlen(ptr), alloc_len;
+
+ *is_multi = false;
+
+ if (GIT_ADD_SIZET_OVERFLOW(&alloc_len, ptr_len, 1) ||
+ (str = git__malloc(alloc_len)) == NULL) {
+ return -1;
+ }
+
+ fixed = str;
+
+ while (*ptr != '\0') {
+ if (*ptr == '"') {
+ quote_count++;
+ } else if (*ptr != '\\') {
+ *fixed++ = *ptr;
+ } else {
+ /* backslash, check the next char */
+ ptr++;
+ /* if we're at the end, it's a multiline, so keep the backslash */
+ if (*ptr == '\0') {
+ *is_multi = true;
+ goto done;
+ }
+ if ((esc = strchr(git_config_escapes, *ptr)) != NULL) {
+ *fixed++ = git_config_escaped[esc - git_config_escapes];
+ } else {
+ git__free(str);
+ giterr_set(GITERR_CONFIG, "invalid escape at %s", ptr);
+ return -1;
+ }
+ }
+ ptr++;
+ }
+
+done:
+ *fixed = '\0';
+ *out = str;
+
+ return 0;
+}
+
+static int parse_multiline_variable(git_config_parser *reader, git_buf *value, int in_quotes)
+{
+ char *line = NULL, *proc_line = NULL;
+ int quote_count;
+ bool multiline;
+
+ /* Check that the next line exists */
+ line = reader_readline(reader, false);
+ if (line == NULL)
+ return -1;
+
+ /* We've reached the end of the file, there is no continuation.
+ * (this is not an error).
+ */
+ if (line[0] == '\0') {
+ git__free(line);
+ return 0;
+ }
+
+ quote_count = strip_comments(line, !!in_quotes);
+
+ /* If it was just a comment, pretend it didn't exist */
+ if (line[0] == '\0') {
+ git__free(line);
+ return parse_multiline_variable(reader, value, quote_count);
+ /* TODO: unbounded recursion. This **could** be exploitable */
+ }
+
+ if (unescape_line(&proc_line, &multiline, line, in_quotes) < 0) {
+ git__free(line);
+ return -1;
+ }
+ /* add this line to the multiline var */
+
+ git_buf_puts(value, proc_line);
+ git__free(line);
+ git__free(proc_line);
+
+ /*
+ * If we need to continue reading the next line, let's just
+ * keep putting stuff in the buffer
+ */
+ if (multiline)
+ return parse_multiline_variable(reader, value, quote_count);
+
+ return 0;
+}
+
+GIT_INLINE(bool) is_namechar(char c)
+{
+ return isalnum(c) || c == '-';
+}
+
+static int parse_name(
+ char **name, const char **value, git_config_parser *reader, const char *line)
+{
+ const char *name_end = line, *value_start;
+
+ *name = NULL;
+ *value = NULL;
+
+ while (*name_end && is_namechar(*name_end))
+ name_end++;
+
+ if (line == name_end) {
+ set_parse_error(reader, 0, "Invalid configuration key");
+ return -1;
+ }
+
+ value_start = name_end;
+
+ while (*value_start && git__isspace(*value_start))
+ value_start++;
+
+ if (*value_start == '=') {
+ *value = value_start + 1;
+ } else if (*value_start) {
+ set_parse_error(reader, 0, "Invalid configuration key");
+ return -1;
+ }
+
+ if ((*name = git__strndup(line, name_end - line)) == NULL)
+ return -1;
+
+ return 0;
+}
+
+static int parse_variable(git_config_parser *reader, char **var_name, char **var_value)
+{
+ const char *value_start = NULL;
+ char *line;
+ int quote_count;
+ bool multiline;
+
+ line = reader_readline(reader, true);
+ if (line == NULL)
+ return -1;
+
+ quote_count = strip_comments(line, 0);
+
+ /* If there is no value, boolean true is assumed */
+ *var_value = NULL;
+
+ if (parse_name(var_name, &value_start, reader, line) < 0)
+ goto on_error;
+
+ /*
+ * Now, let's try to parse the value
+ */
+ if (value_start != NULL) {
+ while (git__isspace(value_start[0]))
+ value_start++;
+
+ if (unescape_line(var_value, &multiline, value_start, 0) < 0)
+ goto on_error;
+
+ if (multiline) {
+ git_buf multi_value = GIT_BUF_INIT;
+ git_buf_attach(&multi_value, *var_value, 0);
+
+ if (parse_multiline_variable(reader, &multi_value, quote_count) < 0 ||
+ git_buf_oom(&multi_value)) {
+ git_buf_free(&multi_value);
+ goto on_error;
+ }
+
+ *var_value = git_buf_detach(&multi_value);
+ }
+ }
+
+ git__free(line);
+ return 0;
+
+on_error:
+ git__free(*var_name);
+ git__free(line);
+ return -1;
+}
+
+int git_config_parse(
+ git_config_parser *parser,
+ git_config_parser_section_cb on_section,
+ git_config_parser_variable_cb on_variable,
+ git_config_parser_comment_cb on_comment,
+ git_config_parser_eof_cb on_eof,
+ void *data)
+{
+ char *current_section = NULL, *var_name, *var_value, *line_start;
+ char c;
+ size_t line_len;
+ int result = 0;
+
+ skip_bom(parser);
+
+ while (result == 0 && !parser->eof) {
+ line_start = parser->read_ptr;
+
+ c = reader_peek(parser, SKIP_WHITESPACE);
+
+ switch (c) {
+ case '\0': /* EOF when peeking, set EOF in the parser to exit the loop */
+ parser->eof = 1;
+ break;
+
+ case '[': /* section header, new section begins */
+ git__free(current_section);
+ current_section = NULL;
+
+ if ((result = parse_section_header(parser, &current_section)) == 0 && on_section) {
+ line_len = parser->read_ptr - line_start;
+ result = on_section(parser, current_section, line_start, line_len, data);
+ }
+ break;
+
+ case '\n': /* comment or whitespace-only */
+ case ';':
+ case '#':
+ reader_consume_line(parser);
+
+ if (on_comment) {
+ line_len = parser->read_ptr - line_start;
+ result = on_comment(parser, line_start, line_len, data);
+ }
+ break;
+
+ default: /* assume variable declaration */
+ if ((result = parse_variable(parser, &var_name, &var_value)) == 0 && on_variable) {
+ line_len = parser->read_ptr - line_start;
+ result = on_variable(parser, current_section, var_name, var_value, line_start, line_len, data);
+ }
+ break;
+ }
+ }
+
+ if (on_eof)
+ result = on_eof(parser, current_section, data);
+
+ git__free(current_section);
+ return result;
+}
diff --git a/src/config_parse.h b/src/config_parse.h
new file mode 100644
index 000000000..a3d53d2ae
--- /dev/null
+++ b/src/config_parse.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+
+#include "common.h"
+#include "array.h"
+#include "oid.h"
+
+static const char *git_config_escapes = "ntb\"\\";
+static const char *git_config_escaped = "\n\t\b\"\\";
+
+typedef struct config_file {
+ git_oid checksum;
+ char *path;
+ git_array_t(struct config_file) includes;
+} git_config_file;
+
+typedef struct {
+ struct config_file *file;
+ git_buf buffer;
+ char *read_ptr;
+ int line_number;
+ int eof;
+} git_config_parser;
+
+typedef int (*git_config_parser_section_cb)(
+ git_config_parser *parser,
+ const char *current_section,
+ const char *line,
+ size_t line_len,
+ void *data);
+
+typedef int (*git_config_parser_variable_cb)(
+ git_config_parser *parser,
+ const char *current_section,
+ char *var_name,
+ char *var_value,
+ const char *line,
+ size_t line_len,
+ void *data);
+
+typedef int (*git_config_parser_comment_cb)(
+ git_config_parser *parser,
+ const char *line,
+ size_t line_len,
+ void *data);
+
+typedef int (*git_config_parser_eof_cb)(
+ git_config_parser *parser,
+ const char *current_section,
+ void *data);
+
+int git_config_parse(
+ git_config_parser *parser,
+ git_config_parser_section_cb on_section,
+ git_config_parser_variable_cb on_variable,
+ git_config_parser_comment_cb on_comment,
+ git_config_parser_eof_cb on_eof,
+ void *data);