diff options
Diffstat (limited to 'src/treesit.c')
-rw-r--r-- | src/treesit.c | 247 |
1 files changed, 164 insertions, 83 deletions
diff --git a/src/treesit.c b/src/treesit.c index 9fa88b48dcc..d168ff02b69 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -2,6 +2,8 @@ Copyright (C) 2021-2022 Free Software Foundation, Inc. +Maintainer: Yuan Fu <casouri@gmail.com> + This file is part of GNU Emacs. GNU Emacs is free software: you can redistribute it and/or modify @@ -404,6 +406,10 @@ init_treesit_functions (void) /*** Initialization */ +/* This is the limit on recursion levels for some tree-sitter + functions. Remember to update docstrings when changing this + value. */ +const ptrdiff_t treesit_recursion_limit = 1000; bool treesit_initialized = false; static bool @@ -927,11 +933,24 @@ static void treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree, Lisp_Object parser) { - uint32_t len; - TSRange *ranges = ts_tree_get_changed_ranges (old_tree, new_tree, &len); + /* If the old_tree is NULL, meaning this is the first parse, the + changed range is the whole buffer. */ + Lisp_Object lisp_ranges; struct buffer *buf = XBUFFER (XTS_PARSER (parser)->buffer); - Lisp_Object lisp_ranges = treesit_make_ranges (ranges, len, buf); - xfree (ranges); + if (old_tree) + { + uint32_t len; + TSRange *ranges = ts_tree_get_changed_ranges (old_tree, new_tree, &len); + lisp_ranges = treesit_make_ranges (ranges, len, buf); + xfree (ranges); + } + else + { + struct buffer *oldbuf = current_buffer; + set_buffer_internal (buf); + lisp_ranges = Fcons (Fcons (Fpoint_min (), Fpoint_max ()), Qnil); + set_buffer_internal (oldbuf); + } specpdl_ref count = SPECPDL_INDEX (); @@ -949,6 +968,11 @@ treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree, static void treesit_ensure_parsed (Lisp_Object parser) { + /* Make sure this comes before everything else, see comment + (ref:notifier-inside-ensure-parsed) for more detail. */ + if (!XTS_PARSER (parser)->need_reparse) + return; + struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); /* Before we parse, catch up with the narrowing situation. */ @@ -957,8 +981,6 @@ treesit_ensure_parsed (Lisp_Object parser) because it might set the flag to true. */ treesit_sync_visible_region (parser); - if (!XTS_PARSER (parser)->need_reparse) - return; TSParser *treesit_parser = XTS_PARSER (parser)->parser; TSTree *tree = XTS_PARSER (parser)->tree; TSInput input = XTS_PARSER (parser)->input; @@ -978,14 +1000,17 @@ treesit_ensure_parsed (Lisp_Object parser) xsignal1 (Qtreesit_parse_error, buf); } - if (tree != NULL) - { - treesit_call_after_change_functions (tree, new_tree, parser); - ts_tree_delete (tree); - } - XTS_PARSER (parser)->tree = new_tree; XTS_PARSER (parser)->need_reparse = false; + + /* After-change functions should run at the very end, most crucially + after need_reparse is set to false, this way if the function + calls some tree-sitter function which invokes + treesit_ensure_parsed again, it returns early and do not + recursively call the after change functions again. + (ref:notifier-inside-ensure-parsed) */ + treesit_call_after_change_functions (tree, new_tree, parser); + ts_tree_delete (tree); } /* This is the read function provided to tree-sitter to read from a @@ -1762,7 +1787,7 @@ If NODE is nil, return nil. */) return build_string (string); } -static TSTreeCursor treesit_cursor_helper (TSNode, Lisp_Object); +static bool treesit_cursor_helper (TSTreeCursor *, TSNode, Lisp_Object); DEFUN ("treesit-node-parent", Ftreesit_node_parent, Streesit_node_parent, 1, 1, 0, @@ -1778,7 +1803,10 @@ Return nil if NODE has no parent. If NODE is nil, return nil. */) TSNode treesit_node = XTS_NODE (node)->node; Lisp_Object parser = XTS_NODE (node)->parser; - TSTreeCursor cursor = treesit_cursor_helper (treesit_node, parser); + TSTreeCursor cursor; + if (!treesit_cursor_helper (&cursor, treesit_node, parser)) + return return_value; + if (ts_tree_cursor_goto_parent (&cursor)) { TSNode parent = ts_tree_cursor_current_node (&cursor); @@ -2042,12 +2070,11 @@ Note that this function returns an immediate child, not the smallest struct buffer *buf = XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer); ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg; - ptrdiff_t byte_pos = buf_charpos_to_bytepos (buf, XFIXNUM (pos)); treesit_check_position (pos, buf); - treesit_initialize (); + ptrdiff_t byte_pos = buf_charpos_to_bytepos (buf, XFIXNUM (pos)); TSNode treesit_node = XTS_NODE (node)->node; TSNode child; if (NILP (named)) @@ -2078,14 +2105,14 @@ If NODE is nil, return nil. */) struct buffer *buf = XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer); ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg; - ptrdiff_t byte_beg = buf_charpos_to_bytepos (buf, XFIXNUM (beg)); - ptrdiff_t byte_end = buf_charpos_to_bytepos (buf, XFIXNUM (end)); treesit_check_position (beg, buf); treesit_check_position (end, buf); treesit_initialize (); + ptrdiff_t byte_beg = buf_charpos_to_bytepos (buf, XFIXNUM (beg)); + ptrdiff_t byte_end = buf_charpos_to_bytepos (buf, XFIXNUM (end)); TSNode treesit_node = XTS_NODE (node)->node; TSNode child; if (NILP (named)) @@ -2161,6 +2188,8 @@ See Info node `(elisp)Pattern Matching' for detailed explanation. */) return build_pure_c_string ("#equal"); if (EQ (pattern, QCmatch)) return build_pure_c_string ("#match"); + if (EQ (pattern, QCpred)) + return build_pure_c_string ("#pred"); Lisp_Object opening_delimeter = build_pure_c_string (VECTORP (pattern) ? "[" : "("); Lisp_Object closing_delimiter @@ -2260,10 +2289,10 @@ treesit_predicates_for_pattern (TSQuery *query, uint32_t pattern_index) return Fnreverse (result); } -/* Translate a capture NAME (symbol) to the text of the captured node. +/* Translate a capture NAME (symbol) to a node. Signals treesit-query-error if such node is not captured. */ static Lisp_Object -treesit_predicate_capture_name_to_text (Lisp_Object name, +treesit_predicate_capture_name_to_node (Lisp_Object name, struct capture_range captures) { Lisp_Object node = Qnil; @@ -2283,6 +2312,16 @@ treesit_predicate_capture_name_to_text (Lisp_Object name, name, build_pure_c_string ("A predicate can only refer" " to captured nodes in the " "same pattern")); + return node; +} + +/* Translate a capture NAME (symbol) to the text of the captured node. + Signals treesit-query-error if such node is not captured. */ +static Lisp_Object +treesit_predicate_capture_name_to_text (Lisp_Object name, + struct capture_range captures) +{ + Lisp_Object node = treesit_predicate_capture_name_to_node (name, captures); struct buffer *old_buffer = current_buffer; set_buffer_internal (XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer)); @@ -2356,13 +2395,30 @@ treesit_predicate_match (Lisp_Object args, struct capture_range captures) return false; } -/* About predicates: I decide to hard-code predicates in C instead of - implementing an extensible system where predicates are translated - to Lisp functions, and new predicates can be added by extending a - list of functions, because I really couldn't imagine any useful - predicates besides equal and match. If we later found out that - such system is indeed useful and necessary, it can be easily - added. */ +/* Handles predicate (#pred FN ARG...). Return true if FN returns + non-nil; return false otherwise. The arity of FN must match the + number of ARGs */ +static bool +treesit_predicate_pred (Lisp_Object args, struct capture_range captures) +{ + if (XFIXNUM (Flength (args)) < 2) + xsignal2 (Qtreesit_query_error, + build_pure_c_string ("Predicate `pred' requires " + "at least two arguments, " + "but was only given"), + Flength (args)); + + Lisp_Object fn = Fintern (XCAR (args), Qnil); + Lisp_Object nodes = Qnil; + Lisp_Object tail = XCDR (args); + FOR_EACH_TAIL (tail) + nodes = Fcons (treesit_predicate_capture_name_to_node (XCAR (tail), + captures), + nodes); + nodes = Fnreverse (nodes); + + return !NILP (CALLN (Fapply, fn, nodes)); +} /* If all predicates in PREDICATES passes, return true; otherwise return false. */ @@ -2378,14 +2434,17 @@ treesit_eval_predicates (struct capture_range captures, Lisp_Object predicates) Lisp_Object fn = XCAR (predicate); Lisp_Object args = XCDR (predicate); if (!NILP (Fstring_equal (fn, build_pure_c_string ("equal")))) - pass = treesit_predicate_equal (args, captures); + pass &= treesit_predicate_equal (args, captures); else if (!NILP (Fstring_equal (fn, build_pure_c_string ("match")))) - pass = treesit_predicate_match (args, captures); + pass &= treesit_predicate_match (args, captures); + else if (!NILP (Fstring_equal (fn, build_pure_c_string ("pred")))) + pass &= treesit_predicate_pred (args, captures); else xsignal3 (Qtreesit_query_error, build_pure_c_string ("Invalid predicate"), fn, build_pure_c_string ("Currently Emacs only supports" - " equal and match predicate")); + " equal, match, and pred" + " predicate")); } /* If all predicates passed, add captures to result list. */ return pass; @@ -2637,8 +2696,59 @@ treesit_assume_true (bool val) eassert (val == true); } +/* Tries to move CURSOR to point to TARGET. END_POS is the end of + TARGET. If success, return true, otherwise move CURSOR back to + starting position and return false. LIMIT is the recursion + limit. */ +static bool +treesit_cursor_helper_1 (TSTreeCursor *cursor, TSNode *target, + uint32_t end_pos, ptrdiff_t limit) +{ + if (limit <= 0) + return false; + + TSNode cursor_node = ts_tree_cursor_current_node (cursor); + if (ts_node_eq (cursor_node, *target)) + return true; + + if (!ts_tree_cursor_goto_first_child (cursor)) + return false; + + /* Skip nodes that definitely don't contain TARGET. */ + while (ts_node_end_byte (cursor_node) < end_pos) + { + if (!ts_tree_cursor_goto_next_sibling (cursor)) + break; + cursor_node = ts_tree_cursor_current_node (cursor); + } + + /* Go through each sibling that could contain TARGET. Because of + missing nodes (their width is 0), there could be multiple + siblings that could contain TARGET. */ + while (ts_node_start_byte (cursor_node) <= end_pos) + { + if (treesit_cursor_helper_1 (cursor, target, end_pos, limit - 1)) + return true; + + if (!ts_tree_cursor_goto_next_sibling (cursor)) + break; + cursor_node = ts_tree_cursor_current_node (cursor); + } + + /* Couldn't find TARGET, must be not in this subtree, move cursor + back and pray that other brothers and sisters can succeed. */ + treesit_assume_true (ts_tree_cursor_goto_parent (cursor)); + return false; +} + /* Create a TSTreeCursor pointing at NODE. PARSER is the lisp parser - that produced NODE. + that produced NODE. If success, return true, otherwise return + false. This function should almost always succeed, but if the parse + tree is strangely too deep and exceeds the recursion limit, this + function will fail and return false. + + If this function returns true, caller needs to free CURSOR; if + returns false, caller don't need to free CURSOR. The reason we need this instead of simply using ts_tree_cursor_new is that we have to create the cursor on the root node and traverse @@ -2646,56 +2756,17 @@ treesit_assume_true (bool val) Otherwise going to sibling or parent of NODE wouldn't work. (Wow perfect filling.) */ -static TSTreeCursor -treesit_cursor_helper (TSNode node, Lisp_Object parser) +static bool +treesit_cursor_helper (TSTreeCursor *cursor, TSNode node, Lisp_Object parser) { uint32_t end_pos = ts_node_end_byte (node); TSNode root = ts_tree_root_node (XTS_PARSER (parser)->tree); - TSTreeCursor cursor = ts_tree_cursor_new (root); - TSNode cursor_node = ts_tree_cursor_current_node (&cursor); - /* This is like treesit-node-at. We go down from the root node, - either to first child or next sibling, repeatedly, and finally - arrive at NODE. */ - while (!ts_node_eq (node, cursor_node)) - { - treesit_assume_true (ts_tree_cursor_goto_first_child (&cursor)); - cursor_node = ts_tree_cursor_current_node (&cursor); - /* ts_tree_cursor_goto_first_child_for_byte is not reliable, so - we just go through each sibling. */ - while (ts_node_is_missing (cursor_node) - || ts_node_end_byte (cursor_node) < end_pos) - { - /* A "missing" node has zero width, so it's possible that - its end = NODE.end but it's not NODE, so we skip them. - But we need to make sure this missing node is not the - node we are looking for before skipping it. */ - if (ts_node_is_missing (cursor_node) - && ts_node_eq (node, cursor_node)) - return cursor; - treesit_assume_true (ts_tree_cursor_goto_next_sibling (&cursor)); - cursor_node = ts_tree_cursor_current_node (&cursor); - } - /* Right now CURSOR.end >= NODE.end. But what if CURSOR.end = - NODE.end, and there are missing nodes after CURSOR, and the - missing node after CURSOR is the NODE we are looking for?? - Well, create a probe and look ahead. (This is tested by - treesit-cursor-helper-with-missing-node.) */ - TSTreeCursor probe = ts_tree_cursor_copy (&cursor); - TSNode probe_node; - while (ts_tree_cursor_goto_next_sibling (&probe)) - { - probe_node = ts_tree_cursor_current_node (&probe); - if (!ts_node_is_missing (probe_node)) - break; - if (ts_node_eq (probe_node, node)) - { - ts_tree_cursor_delete (&cursor); - return probe; - } - } - ts_tree_cursor_delete (&probe); - } - return cursor; + *cursor = ts_tree_cursor_new (root); + bool success = treesit_cursor_helper_1 (cursor, &node, end_pos, + treesit_recursion_limit); + if (!success) + ts_tree_cursor_delete (cursor); + return success; } /* Move CURSOR to the next/previous sibling. FORWARD controls the @@ -2957,7 +3028,7 @@ Return the first matched node, or nil if none matches. */) /* We use a default limit of 1000. See bug#59426 for the discussion. */ - ptrdiff_t the_limit = 1000; + ptrdiff_t the_limit = treesit_recursion_limit; if (!NILP (limit)) { CHECK_FIXNUM (limit); @@ -2968,7 +3039,10 @@ Return the first matched node, or nil if none matches. */) Lisp_Object parser = XTS_NODE (node)->parser; Lisp_Object return_value = Qnil; - TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (node)->node, parser); + TSTreeCursor cursor; + if (!treesit_cursor_helper (&cursor, XTS_NODE (node)->node, parser)) + return return_value; + if (treesit_search_dfs (&cursor, predicate, parser, NILP (backward), NILP (all), the_limit, false)) { @@ -3022,7 +3096,10 @@ always traverse leaf nodes first, then upwards. */) Lisp_Object parser = XTS_NODE (start)->parser; Lisp_Object return_value = Qnil; - TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (start)->node, parser); + TSTreeCursor cursor; + if (!treesit_cursor_helper (&cursor, XTS_NODE (start)->node, parser)) + return return_value; + if (treesit_search_forward (&cursor, predicate, parser, NILP (backward), NILP (all))) { @@ -3130,7 +3207,7 @@ a regexp. */) /* We use a default limit of 1000. See bug#59426 for the discussion. */ - ptrdiff_t the_limit = 1000; + ptrdiff_t the_limit = treesit_recursion_limit; if (!NILP (limit)) { CHECK_FIXNUM (limit); @@ -3141,7 +3218,10 @@ a regexp. */) Lisp_Object parser = XTS_NODE (root)->parser; Lisp_Object parent = Fcons (Qnil, Qnil); - TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (root)->node, parser); + TSTreeCursor cursor; + if (!treesit_cursor_helper (&cursor, XTS_NODE (root)->node, parser)) + return Qnil; + treesit_build_sparse_tree (&cursor, parent, predicate, process_fn, the_limit, parser); ts_tree_cursor_delete (&cursor); @@ -3187,6 +3267,7 @@ syms_of_treesit (void) DEFSYM (QCanchor, ":anchor"); DEFSYM (QCequal, ":equal"); DEFSYM (QCmatch, ":match"); + DEFSYM (QCpred, ":pred"); DEFSYM (Qnot_found, "not-found"); DEFSYM (Qsymbol_error, "symbol-error"); |