summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJo-Philipp Wich <jo@mein.io>2018-02-04 18:36:07 +0100
committerJo-Philipp Wich <jo@mein.io>2018-02-04 18:45:21 +0100
commitc7e938d6582a436dddc938539e72dd1320625c54 (patch)
treeae5ed1e3ee150ea8936433fed04f6a2dd03ac2d0
parentcd6629fc75787cbfcbec12282bd738373bc46ac6 (diff)
downloadjsonpath-c7e938d6582a436dddc938539e72dd1320625c54.tar.gz
implement POSIX regexp support
Introduce a new operator `~` and new `/.../eis` regular expression syntax. This allows filtering by regular expression, e.g. jsonfilter -s '[ "foo", "bar", "baz" ]' -e '$[@ ~ /^b/]' ... would yield the values `bar` and `baz`. Possible regular expression modifiers are: - `e` ... enable extended POSIX regular expressions - `i` ... perform case insensitive matches - `s` ... let ranges and `.` match the newline character A regular expression literal may occur on the left or the right side of the `~` operator, but not on both. In case neither side of the `~` operator is a regular expression, the right side will be treated as regular expression pattern. Non-string values are converted to their string representation before performing matching. Signed-off-by: Jo-Philipp Wich <jo@mein.io>
-rw-r--r--lexer.c75
-rw-r--r--lexer.h2
-rw-r--r--matcher.c97
-rw-r--r--matcher.h2
-rw-r--r--parser.y4
5 files changed, 176 insertions, 4 deletions
diff --git a/lexer.c b/lexer.c
index ca5880e..c016d41 100644
--- a/lexer.c
+++ b/lexer.c
@@ -18,6 +18,7 @@
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
+#include <regex.h>
#include "ast.h"
#include "lexer.h"
@@ -236,7 +237,21 @@ parse_string(const char *buf, struct jp_opcode *op, struct jp_state *s)
case 'r': *out = '\r'; break;
case 't': *out = '\t'; break;
case 'v': *out = '\v'; break;
- default: *out = *in; break;
+ default:
+ /* in regexp mode, retain backslash */
+ if (q == '/')
+ {
+ if (rem-- < 1)
+ {
+ s->error_pos = s->off + (in - buf);
+ return -3;
+ }
+
+ *out++ = '\\';
+ }
+
+ *out = *in;
+ break;
}
in++;
@@ -278,6 +293,58 @@ parse_string(const char *buf, struct jp_opcode *op, struct jp_state *s)
/*
+ * Parses a regexp literal from the given buffer.
+ *
+ * Returns a negative value on error, otherwise the amount of consumed
+ * characters from the given buffer.
+ *
+ * Error values:
+ * -1 Unterminated regexp
+ * -2 Invalid escape sequence
+ * -3 Regexp literal too long
+ */
+
+static int
+parse_regexp(const char *buf, struct jp_opcode *op, struct jp_state *s)
+{
+ int len = parse_string(buf, op, s);
+ const char *p;
+
+ if (len >= 2)
+ {
+ op->num = REG_NOSUB | REG_NEWLINE;
+
+ for (p = buf + len; p; p++)
+ {
+ switch (*p)
+ {
+ case 'e':
+ op->num |= REG_EXTENDED;
+ len++;
+ break;
+
+ case 'i':
+ op->num |= REG_ICASE;
+ len++;
+ break;
+
+ case 's':
+ op->num &= ~REG_NEWLINE;
+ len++;
+ break;
+
+ default:
+ return len;
+ }
+ }
+
+ }
+
+ return len;
+}
+
+
+/*
* Parses a label from the given buffer.
*
* Returns a negative value on error, otherwise the amount of consumed
@@ -367,8 +434,10 @@ static const struct token tokens[] = {
{ T_LT, "<", 1 },
{ T_GT, ">", 1 },
{ T_EQ, "=", 1 },
+ { T_MATCH, "~", 1 },
{ T_NOT, "!", 1 },
{ T_WILDCARD, "*", 1 },
+ { T_REGEXP, "/", 1, parse_regexp },
{ T_STRING, "'", 1, parse_string },
{ T_STRING, "\"", 1, parse_string },
{ T_LABEL, "_", 1, parse_label },
@@ -378,7 +447,7 @@ static const struct token tokens[] = {
{ T_NUMBER, "09", 0, parse_number },
};
-const char *tokennames[23] = {
+const char *tokennames[25] = {
[0] = "End of file",
[T_AND] = "'&&'",
[T_OR] = "'||'",
@@ -389,12 +458,14 @@ const char *tokennames[23] = {
[T_GE] = "'>='",
[T_LT] = "'<'",
[T_LE] = "'<='",
+ [T_MATCH] = "'~'",
[T_NOT] = "'!'",
[T_LABEL] = "Label",
[T_ROOT] = "'$'",
[T_THIS] = "'@'",
[T_DOT] = "'.'",
[T_WILDCARD] = "'*'",
+ [T_REGEXP] = "/.../",
[T_BROPEN] = "'['",
[T_BRCLOSE] = "']'",
[T_BOOL] = "Bool",
diff --git a/lexer.h b/lexer.h
index 0906f76..a47c154 100644
--- a/lexer.h
+++ b/lexer.h
@@ -19,7 +19,7 @@
#include "ast.h"
-extern const char *tokennames[23];
+extern const char *tokennames[25];
struct jp_opcode *
jp_get_token(struct jp_state *s, const char *input, int *mlen);
diff --git a/matcher.c b/matcher.c
index 85bd1c5..d2a8767 100644
--- a/matcher.c
+++ b/matcher.c
@@ -17,6 +17,7 @@
#include "parser.h"
#include "matcher.h"
+
static struct json_object *
jp_match_next(struct jp_opcode *ptr,
struct json_object *root, struct json_object *cur,
@@ -131,6 +132,99 @@ jp_cmp(struct jp_opcode *op, struct json_object *root, struct json_object *cur)
}
static bool
+jp_regmatch(struct jp_opcode *op, struct json_object *root, struct json_object *cur)
+{
+ struct jp_opcode left, right;
+ char lbuf[22], rbuf[22], *lval, *rval;
+ int err, rflags = REG_NOSUB | REG_NEWLINE;
+ regex_t preg;
+
+
+ if (!jp_resolve(root, cur, op->down, &left) ||
+ !jp_resolve(root, cur, op->down->sibling, &right))
+ return false;
+
+ if (left.type == T_REGEXP)
+ {
+ switch (right.type)
+ {
+ case T_BOOL:
+ lval = right.num ? "true" : "false";
+ break;
+
+ case T_NUMBER:
+ snprintf(lbuf, sizeof(lbuf), "%d", right.num);
+ lval = lbuf;
+ break;
+
+ case T_STRING:
+ lval = right.str;
+ break;
+
+ default:
+ return false;
+ }
+
+ rval = left.str;
+ rflags = left.num;
+ }
+ else
+ {
+ switch (left.type)
+ {
+ case T_BOOL:
+ lval = left.num ? "true" : "false";
+ break;
+
+ case T_NUMBER:
+ snprintf(lbuf, sizeof(lbuf), "%d", left.num);
+ lval = lbuf;
+ break;
+
+ case T_STRING:
+ lval = left.str;
+ break;
+
+ default:
+ return false;
+ }
+
+ switch (right.type)
+ {
+ case T_BOOL:
+ rval = right.num ? "true" : "false";
+ break;
+
+ case T_NUMBER:
+ snprintf(rbuf, sizeof(rbuf), "%d", right.num);
+ rval = rbuf;
+ break;
+
+ case T_STRING:
+ rval = right.str;
+ break;
+
+ case T_REGEXP:
+ rval = right.str;
+ rflags = right.num;
+ break;
+
+ default:
+ return false;
+ }
+ }
+
+ if (regcomp(&preg, rval, rflags))
+ return false;
+
+ err = regexec(&preg, lval, 0, NULL, 0);
+
+ regfree(&preg);
+
+ return err ? false : true;
+}
+
+static bool
jp_expr(struct jp_opcode *op, struct json_object *root, struct json_object *cur,
int idx, const char *key, jp_match_cb_t cb, void *priv)
{
@@ -149,6 +243,9 @@ jp_expr(struct jp_opcode *op, struct json_object *root, struct json_object *cur,
case T_GE:
return jp_cmp(op, root, cur);
+ case T_MATCH:
+ return jp_regmatch(op, root, cur);
+
case T_ROOT:
return !!jp_match(op, root, NULL, NULL);
diff --git a/matcher.h b/matcher.h
index 468ddf2..aac21b9 100644
--- a/matcher.h
+++ b/matcher.h
@@ -19,6 +19,8 @@
#include <string.h>
#include <stdbool.h>
+#include <stdio.h>
+#include <regex.h>
#ifdef JSONC
#include <json.h>
diff --git a/parser.y b/parser.y
index 29b43ba..4d3581e 100644
--- a/parser.y
+++ b/parser.y
@@ -20,7 +20,7 @@
%left T_AND.
%left T_OR.
%left T_UNION.
-%nonassoc T_EQ T_NE T_GT T_GE T_LT T_LE.
+%nonassoc T_EQ T_NE T_GT T_GE T_LT T_LE T_MATCH.
%right T_NOT.
%include {
@@ -87,11 +87,13 @@ cmp_exp(A) ::= unary_exp(B) T_GT unary_exp(C). { A = alloc_op(T_GT, 0, NULL, B,
cmp_exp(A) ::= unary_exp(B) T_GE unary_exp(C). { A = alloc_op(T_GE, 0, NULL, B, C); }
cmp_exp(A) ::= unary_exp(B) T_EQ unary_exp(C). { A = alloc_op(T_EQ, 0, NULL, B, C); }
cmp_exp(A) ::= unary_exp(B) T_NE unary_exp(C). { A = alloc_op(T_NE, 0, NULL, B, C); }
+cmp_exp(A) ::= unary_exp(B) T_MATCH unary_exp(C). { A = alloc_op(T_MATCH, 0, NULL, B, C); }
cmp_exp(A) ::= unary_exp(B). { A = B; }
unary_exp(A) ::= T_BOOL(B). { A = B; }
unary_exp(A) ::= T_NUMBER(B). { A = B; }
unary_exp(A) ::= T_STRING(B). { A = B; }
+unary_exp(A) ::= T_REGEXP(B). { A = B; }
unary_exp(A) ::= T_WILDCARD(B). { A = B; }
unary_exp(A) ::= T_POPEN or_exps(B) T_PCLOSE. { A = B; }
unary_exp(A) ::= T_NOT unary_exp(B). { A = alloc_op(T_NOT, 0, NULL, B); }