summaryrefslogtreecommitdiff
path: root/src/treesit.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/treesit.c')
-rw-r--r--src/treesit.c247
1 files changed, 164 insertions, 83 deletions
diff --git a/src/treesit.c b/src/treesit.c
index 9fa88b48dcc..d168ff02b69 100644
--- a/src/treesit.c
+++ b/src/treesit.c
@@ -2,6 +2,8 @@
Copyright (C) 2021-2022 Free Software Foundation, Inc.
+Maintainer: Yuan Fu <casouri@gmail.com>
+
This file is part of GNU Emacs.
GNU Emacs is free software: you can redistribute it and/or modify
@@ -404,6 +406,10 @@ init_treesit_functions (void)
/*** Initialization */
+/* This is the limit on recursion levels for some tree-sitter
+ functions. Remember to update docstrings when changing this
+ value. */
+const ptrdiff_t treesit_recursion_limit = 1000;
bool treesit_initialized = false;
static bool
@@ -927,11 +933,24 @@ static void
treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree,
Lisp_Object parser)
{
- uint32_t len;
- TSRange *ranges = ts_tree_get_changed_ranges (old_tree, new_tree, &len);
+ /* If the old_tree is NULL, meaning this is the first parse, the
+ changed range is the whole buffer. */
+ Lisp_Object lisp_ranges;
struct buffer *buf = XBUFFER (XTS_PARSER (parser)->buffer);
- Lisp_Object lisp_ranges = treesit_make_ranges (ranges, len, buf);
- xfree (ranges);
+ if (old_tree)
+ {
+ uint32_t len;
+ TSRange *ranges = ts_tree_get_changed_ranges (old_tree, new_tree, &len);
+ lisp_ranges = treesit_make_ranges (ranges, len, buf);
+ xfree (ranges);
+ }
+ else
+ {
+ struct buffer *oldbuf = current_buffer;
+ set_buffer_internal (buf);
+ lisp_ranges = Fcons (Fcons (Fpoint_min (), Fpoint_max ()), Qnil);
+ set_buffer_internal (oldbuf);
+ }
specpdl_ref count = SPECPDL_INDEX ();
@@ -949,6 +968,11 @@ treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree,
static void
treesit_ensure_parsed (Lisp_Object parser)
{
+ /* Make sure this comes before everything else, see comment
+ (ref:notifier-inside-ensure-parsed) for more detail. */
+ if (!XTS_PARSER (parser)->need_reparse)
+ return;
+
struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
/* Before we parse, catch up with the narrowing situation. */
@@ -957,8 +981,6 @@ treesit_ensure_parsed (Lisp_Object parser)
because it might set the flag to true. */
treesit_sync_visible_region (parser);
- if (!XTS_PARSER (parser)->need_reparse)
- return;
TSParser *treesit_parser = XTS_PARSER (parser)->parser;
TSTree *tree = XTS_PARSER (parser)->tree;
TSInput input = XTS_PARSER (parser)->input;
@@ -978,14 +1000,17 @@ treesit_ensure_parsed (Lisp_Object parser)
xsignal1 (Qtreesit_parse_error, buf);
}
- if (tree != NULL)
- {
- treesit_call_after_change_functions (tree, new_tree, parser);
- ts_tree_delete (tree);
- }
-
XTS_PARSER (parser)->tree = new_tree;
XTS_PARSER (parser)->need_reparse = false;
+
+ /* After-change functions should run at the very end, most crucially
+ after need_reparse is set to false, this way if the function
+ calls some tree-sitter function which invokes
+ treesit_ensure_parsed again, it returns early and do not
+ recursively call the after change functions again.
+ (ref:notifier-inside-ensure-parsed) */
+ treesit_call_after_change_functions (tree, new_tree, parser);
+ ts_tree_delete (tree);
}
/* This is the read function provided to tree-sitter to read from a
@@ -1762,7 +1787,7 @@ If NODE is nil, return nil. */)
return build_string (string);
}
-static TSTreeCursor treesit_cursor_helper (TSNode, Lisp_Object);
+static bool treesit_cursor_helper (TSTreeCursor *, TSNode, Lisp_Object);
DEFUN ("treesit-node-parent",
Ftreesit_node_parent, Streesit_node_parent, 1, 1, 0,
@@ -1778,7 +1803,10 @@ Return nil if NODE has no parent. If NODE is nil, return nil. */)
TSNode treesit_node = XTS_NODE (node)->node;
Lisp_Object parser = XTS_NODE (node)->parser;
- TSTreeCursor cursor = treesit_cursor_helper (treesit_node, parser);
+ TSTreeCursor cursor;
+ if (!treesit_cursor_helper (&cursor, treesit_node, parser))
+ return return_value;
+
if (ts_tree_cursor_goto_parent (&cursor))
{
TSNode parent = ts_tree_cursor_current_node (&cursor);
@@ -2042,12 +2070,11 @@ Note that this function returns an immediate child, not the smallest
struct buffer *buf = XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
- ptrdiff_t byte_pos = buf_charpos_to_bytepos (buf, XFIXNUM (pos));
treesit_check_position (pos, buf);
-
treesit_initialize ();
+ ptrdiff_t byte_pos = buf_charpos_to_bytepos (buf, XFIXNUM (pos));
TSNode treesit_node = XTS_NODE (node)->node;
TSNode child;
if (NILP (named))
@@ -2078,14 +2105,14 @@ If NODE is nil, return nil. */)
struct buffer *buf = XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
- ptrdiff_t byte_beg = buf_charpos_to_bytepos (buf, XFIXNUM (beg));
- ptrdiff_t byte_end = buf_charpos_to_bytepos (buf, XFIXNUM (end));
treesit_check_position (beg, buf);
treesit_check_position (end, buf);
treesit_initialize ();
+ ptrdiff_t byte_beg = buf_charpos_to_bytepos (buf, XFIXNUM (beg));
+ ptrdiff_t byte_end = buf_charpos_to_bytepos (buf, XFIXNUM (end));
TSNode treesit_node = XTS_NODE (node)->node;
TSNode child;
if (NILP (named))
@@ -2161,6 +2188,8 @@ See Info node `(elisp)Pattern Matching' for detailed explanation. */)
return build_pure_c_string ("#equal");
if (EQ (pattern, QCmatch))
return build_pure_c_string ("#match");
+ if (EQ (pattern, QCpred))
+ return build_pure_c_string ("#pred");
Lisp_Object opening_delimeter
= build_pure_c_string (VECTORP (pattern) ? "[" : "(");
Lisp_Object closing_delimiter
@@ -2260,10 +2289,10 @@ treesit_predicates_for_pattern (TSQuery *query, uint32_t pattern_index)
return Fnreverse (result);
}
-/* Translate a capture NAME (symbol) to the text of the captured node.
+/* Translate a capture NAME (symbol) to a node.
Signals treesit-query-error if such node is not captured. */
static Lisp_Object
-treesit_predicate_capture_name_to_text (Lisp_Object name,
+treesit_predicate_capture_name_to_node (Lisp_Object name,
struct capture_range captures)
{
Lisp_Object node = Qnil;
@@ -2283,6 +2312,16 @@ treesit_predicate_capture_name_to_text (Lisp_Object name,
name, build_pure_c_string ("A predicate can only refer"
" to captured nodes in the "
"same pattern"));
+ return node;
+}
+
+/* Translate a capture NAME (symbol) to the text of the captured node.
+ Signals treesit-query-error if such node is not captured. */
+static Lisp_Object
+treesit_predicate_capture_name_to_text (Lisp_Object name,
+ struct capture_range captures)
+{
+ Lisp_Object node = treesit_predicate_capture_name_to_node (name, captures);
struct buffer *old_buffer = current_buffer;
set_buffer_internal (XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer));
@@ -2356,13 +2395,30 @@ treesit_predicate_match (Lisp_Object args, struct capture_range captures)
return false;
}
-/* About predicates: I decide to hard-code predicates in C instead of
- implementing an extensible system where predicates are translated
- to Lisp functions, and new predicates can be added by extending a
- list of functions, because I really couldn't imagine any useful
- predicates besides equal and match. If we later found out that
- such system is indeed useful and necessary, it can be easily
- added. */
+/* Handles predicate (#pred FN ARG...). Return true if FN returns
+ non-nil; return false otherwise. The arity of FN must match the
+ number of ARGs */
+static bool
+treesit_predicate_pred (Lisp_Object args, struct capture_range captures)
+{
+ if (XFIXNUM (Flength (args)) < 2)
+ xsignal2 (Qtreesit_query_error,
+ build_pure_c_string ("Predicate `pred' requires "
+ "at least two arguments, "
+ "but was only given"),
+ Flength (args));
+
+ Lisp_Object fn = Fintern (XCAR (args), Qnil);
+ Lisp_Object nodes = Qnil;
+ Lisp_Object tail = XCDR (args);
+ FOR_EACH_TAIL (tail)
+ nodes = Fcons (treesit_predicate_capture_name_to_node (XCAR (tail),
+ captures),
+ nodes);
+ nodes = Fnreverse (nodes);
+
+ return !NILP (CALLN (Fapply, fn, nodes));
+}
/* If all predicates in PREDICATES passes, return true; otherwise
return false. */
@@ -2378,14 +2434,17 @@ treesit_eval_predicates (struct capture_range captures, Lisp_Object predicates)
Lisp_Object fn = XCAR (predicate);
Lisp_Object args = XCDR (predicate);
if (!NILP (Fstring_equal (fn, build_pure_c_string ("equal"))))
- pass = treesit_predicate_equal (args, captures);
+ pass &= treesit_predicate_equal (args, captures);
else if (!NILP (Fstring_equal (fn, build_pure_c_string ("match"))))
- pass = treesit_predicate_match (args, captures);
+ pass &= treesit_predicate_match (args, captures);
+ else if (!NILP (Fstring_equal (fn, build_pure_c_string ("pred"))))
+ pass &= treesit_predicate_pred (args, captures);
else
xsignal3 (Qtreesit_query_error,
build_pure_c_string ("Invalid predicate"),
fn, build_pure_c_string ("Currently Emacs only supports"
- " equal and match predicate"));
+ " equal, match, and pred"
+ " predicate"));
}
/* If all predicates passed, add captures to result list. */
return pass;
@@ -2637,8 +2696,59 @@ treesit_assume_true (bool val)
eassert (val == true);
}
+/* Tries to move CURSOR to point to TARGET. END_POS is the end of
+ TARGET. If success, return true, otherwise move CURSOR back to
+ starting position and return false. LIMIT is the recursion
+ limit. */
+static bool
+treesit_cursor_helper_1 (TSTreeCursor *cursor, TSNode *target,
+ uint32_t end_pos, ptrdiff_t limit)
+{
+ if (limit <= 0)
+ return false;
+
+ TSNode cursor_node = ts_tree_cursor_current_node (cursor);
+ if (ts_node_eq (cursor_node, *target))
+ return true;
+
+ if (!ts_tree_cursor_goto_first_child (cursor))
+ return false;
+
+ /* Skip nodes that definitely don't contain TARGET. */
+ while (ts_node_end_byte (cursor_node) < end_pos)
+ {
+ if (!ts_tree_cursor_goto_next_sibling (cursor))
+ break;
+ cursor_node = ts_tree_cursor_current_node (cursor);
+ }
+
+ /* Go through each sibling that could contain TARGET. Because of
+ missing nodes (their width is 0), there could be multiple
+ siblings that could contain TARGET. */
+ while (ts_node_start_byte (cursor_node) <= end_pos)
+ {
+ if (treesit_cursor_helper_1 (cursor, target, end_pos, limit - 1))
+ return true;
+
+ if (!ts_tree_cursor_goto_next_sibling (cursor))
+ break;
+ cursor_node = ts_tree_cursor_current_node (cursor);
+ }
+
+ /* Couldn't find TARGET, must be not in this subtree, move cursor
+ back and pray that other brothers and sisters can succeed. */
+ treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
+ return false;
+}
+
/* Create a TSTreeCursor pointing at NODE. PARSER is the lisp parser
- that produced NODE.
+ that produced NODE. If success, return true, otherwise return
+ false. This function should almost always succeed, but if the parse
+ tree is strangely too deep and exceeds the recursion limit, this
+ function will fail and return false.
+
+ If this function returns true, caller needs to free CURSOR; if
+ returns false, caller don't need to free CURSOR.
The reason we need this instead of simply using ts_tree_cursor_new
is that we have to create the cursor on the root node and traverse
@@ -2646,56 +2756,17 @@ treesit_assume_true (bool val)
Otherwise going to sibling or parent of NODE wouldn't work.
(Wow perfect filling.) */
-static TSTreeCursor
-treesit_cursor_helper (TSNode node, Lisp_Object parser)
+static bool
+treesit_cursor_helper (TSTreeCursor *cursor, TSNode node, Lisp_Object parser)
{
uint32_t end_pos = ts_node_end_byte (node);
TSNode root = ts_tree_root_node (XTS_PARSER (parser)->tree);
- TSTreeCursor cursor = ts_tree_cursor_new (root);
- TSNode cursor_node = ts_tree_cursor_current_node (&cursor);
- /* This is like treesit-node-at. We go down from the root node,
- either to first child or next sibling, repeatedly, and finally
- arrive at NODE. */
- while (!ts_node_eq (node, cursor_node))
- {
- treesit_assume_true (ts_tree_cursor_goto_first_child (&cursor));
- cursor_node = ts_tree_cursor_current_node (&cursor);
- /* ts_tree_cursor_goto_first_child_for_byte is not reliable, so
- we just go through each sibling. */
- while (ts_node_is_missing (cursor_node)
- || ts_node_end_byte (cursor_node) < end_pos)
- {
- /* A "missing" node has zero width, so it's possible that
- its end = NODE.end but it's not NODE, so we skip them.
- But we need to make sure this missing node is not the
- node we are looking for before skipping it. */
- if (ts_node_is_missing (cursor_node)
- && ts_node_eq (node, cursor_node))
- return cursor;
- treesit_assume_true (ts_tree_cursor_goto_next_sibling (&cursor));
- cursor_node = ts_tree_cursor_current_node (&cursor);
- }
- /* Right now CURSOR.end >= NODE.end. But what if CURSOR.end =
- NODE.end, and there are missing nodes after CURSOR, and the
- missing node after CURSOR is the NODE we are looking for??
- Well, create a probe and look ahead. (This is tested by
- treesit-cursor-helper-with-missing-node.) */
- TSTreeCursor probe = ts_tree_cursor_copy (&cursor);
- TSNode probe_node;
- while (ts_tree_cursor_goto_next_sibling (&probe))
- {
- probe_node = ts_tree_cursor_current_node (&probe);
- if (!ts_node_is_missing (probe_node))
- break;
- if (ts_node_eq (probe_node, node))
- {
- ts_tree_cursor_delete (&cursor);
- return probe;
- }
- }
- ts_tree_cursor_delete (&probe);
- }
- return cursor;
+ *cursor = ts_tree_cursor_new (root);
+ bool success = treesit_cursor_helper_1 (cursor, &node, end_pos,
+ treesit_recursion_limit);
+ if (!success)
+ ts_tree_cursor_delete (cursor);
+ return success;
}
/* Move CURSOR to the next/previous sibling. FORWARD controls the
@@ -2957,7 +3028,7 @@ Return the first matched node, or nil if none matches. */)
/* We use a default limit of 1000. See bug#59426 for the
discussion. */
- ptrdiff_t the_limit = 1000;
+ ptrdiff_t the_limit = treesit_recursion_limit;
if (!NILP (limit))
{
CHECK_FIXNUM (limit);
@@ -2968,7 +3039,10 @@ Return the first matched node, or nil if none matches. */)
Lisp_Object parser = XTS_NODE (node)->parser;
Lisp_Object return_value = Qnil;
- TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (node)->node, parser);
+ TSTreeCursor cursor;
+ if (!treesit_cursor_helper (&cursor, XTS_NODE (node)->node, parser))
+ return return_value;
+
if (treesit_search_dfs (&cursor, predicate, parser, NILP (backward),
NILP (all), the_limit, false))
{
@@ -3022,7 +3096,10 @@ always traverse leaf nodes first, then upwards. */)
Lisp_Object parser = XTS_NODE (start)->parser;
Lisp_Object return_value = Qnil;
- TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (start)->node, parser);
+ TSTreeCursor cursor;
+ if (!treesit_cursor_helper (&cursor, XTS_NODE (start)->node, parser))
+ return return_value;
+
if (treesit_search_forward (&cursor, predicate, parser,
NILP (backward), NILP (all)))
{
@@ -3130,7 +3207,7 @@ a regexp. */)
/* We use a default limit of 1000. See bug#59426 for the
discussion. */
- ptrdiff_t the_limit = 1000;
+ ptrdiff_t the_limit = treesit_recursion_limit;
if (!NILP (limit))
{
CHECK_FIXNUM (limit);
@@ -3141,7 +3218,10 @@ a regexp. */)
Lisp_Object parser = XTS_NODE (root)->parser;
Lisp_Object parent = Fcons (Qnil, Qnil);
- TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (root)->node, parser);
+ TSTreeCursor cursor;
+ if (!treesit_cursor_helper (&cursor, XTS_NODE (root)->node, parser))
+ return Qnil;
+
treesit_build_sparse_tree (&cursor, parent, predicate, process_fn,
the_limit, parser);
ts_tree_cursor_delete (&cursor);
@@ -3187,6 +3267,7 @@ syms_of_treesit (void)
DEFSYM (QCanchor, ":anchor");
DEFSYM (QCequal, ":equal");
DEFSYM (QCmatch, ":match");
+ DEFSYM (QCpred, ":pred");
DEFSYM (Qnot_found, "not-found");
DEFSYM (Qsymbol_error, "symbol-error");