1 files changed, 164 insertions, 83 deletions
diff --git a/src/treesit.c b/src/treesit.c
index 9fa88b48dcc..d168ff02b69 100644
--- a/src/treesit.c
+++ b/src/treesit.c
@@ -2,6 +2,8 @@
 
 Copyright (C) 2021-2022 Free Software Foundation, Inc.
 
+Maintainer: Yuan Fu <casouri@gmail.com>
+
 This file is part of GNU Emacs.
 
 GNU Emacs is free software: you can redistribute it and/or modify
@@ -404,6 +406,10 @@ init_treesit_functions (void)
 
 /*** Initialization */
 
+/* This is the limit on recursion levels for some tree-sitter
+   functions.  Remember to update docstrings when changing this
+   value. */
+const ptrdiff_t treesit_recursion_limit = 1000;
 bool treesit_initialized = false;
 
 static bool
@@ -927,11 +933,24 @@ static void
 treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree,
 				     Lisp_Object parser)
 {
-  uint32_t len;
-  TSRange *ranges = ts_tree_get_changed_ranges (old_tree, new_tree, &len);
+  /* If the old_tree is NULL, meaning this is the first parse, the
+     changed range is the whole buffer.  */
+  Lisp_Object lisp_ranges;
   struct buffer *buf = XBUFFER (XTS_PARSER (parser)->buffer);
-  Lisp_Object lisp_ranges = treesit_make_ranges (ranges, len, buf);
-  xfree (ranges);
+  if (old_tree)
+    {
+      uint32_t len;
+      TSRange *ranges = ts_tree_get_changed_ranges (old_tree, new_tree, &len);
+      lisp_ranges = treesit_make_ranges (ranges, len, buf);
+      xfree (ranges);
+    }
+  else
+    {
+      struct buffer *oldbuf = current_buffer;
+      set_buffer_internal (buf);
+      lisp_ranges = Fcons (Fcons (Fpoint_min (), Fpoint_max ()), Qnil);
+      set_buffer_internal (oldbuf);
+    }
 
   specpdl_ref count = SPECPDL_INDEX ();
 
@@ -949,6 +968,11 @@ treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree,
 static void
 treesit_ensure_parsed (Lisp_Object parser)
 {
+  /* Make sure this comes before everything else, see comment
+     (ref:notifier-inside-ensure-parsed) for more detail.  */
+  if (!XTS_PARSER (parser)->need_reparse)
+    return;
+
   struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
 
   /* Before we parse, catch up with the narrowing situation.  */
@@ -957,8 +981,6 @@ treesit_ensure_parsed (Lisp_Object parser)
      because it might set the flag to true.  */
   treesit_sync_visible_region (parser);
 
-  if (!XTS_PARSER (parser)->need_reparse)
-    return;
   TSParser *treesit_parser = XTS_PARSER (parser)->parser;
   TSTree *tree = XTS_PARSER (parser)->tree;
   TSInput input = XTS_PARSER (parser)->input;
@@ -978,14 +1000,17 @@ treesit_ensure_parsed (Lisp_Object parser)
       xsignal1 (Qtreesit_parse_error, buf);
     }
 
-  if (tree != NULL)
-    {
-      treesit_call_after_change_functions (tree, new_tree, parser);
-      ts_tree_delete (tree);
-    }
-
   XTS_PARSER (parser)->tree = new_tree;
   XTS_PARSER (parser)->need_reparse = false;
+
+  /* After-change functions should run at the very end, most crucially
+     after need_reparse is set to false, this way if the function
+     calls some tree-sitter function which invokes
+     treesit_ensure_parsed again, it returns early and do not
+     recursively call the after change functions again.
+     (ref:notifier-inside-ensure-parsed)  */
+  treesit_call_after_change_functions (tree, new_tree, parser);
+  ts_tree_delete (tree);
 }
 
 /* This is the read function provided to tree-sitter to read from a
@@ -1762,7 +1787,7 @@ If NODE is nil, return nil.  */)
   return build_string (string);
 }
 
-static TSTreeCursor treesit_cursor_helper (TSNode, Lisp_Object);
+static bool treesit_cursor_helper (TSTreeCursor *, TSNode, Lisp_Object);
 
 DEFUN ("treesit-node-parent",
        Ftreesit_node_parent, Streesit_node_parent, 1, 1, 0,
@@ -1778,7 +1803,10 @@ Return nil if NODE has no parent.  If NODE is nil, return nil.  */)
 
   TSNode treesit_node = XTS_NODE (node)->node;
   Lisp_Object parser = XTS_NODE (node)->parser;
-  TSTreeCursor cursor = treesit_cursor_helper (treesit_node, parser);
+  TSTreeCursor cursor;
+  if (!treesit_cursor_helper (&cursor, treesit_node, parser))
+    return return_value;
+
   if (ts_tree_cursor_goto_parent (&cursor))
   {
     TSNode parent = ts_tree_cursor_current_node (&cursor);
@@ -2042,12 +2070,11 @@ Note that this function returns an immediate child, not the smallest
 
   struct buffer *buf = XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
   ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
-  ptrdiff_t byte_pos = buf_charpos_to_bytepos (buf, XFIXNUM (pos));
 
   treesit_check_position (pos, buf);
-
   treesit_initialize ();
 
+  ptrdiff_t byte_pos = buf_charpos_to_bytepos (buf, XFIXNUM (pos));
   TSNode treesit_node = XTS_NODE (node)->node;
   TSNode child;
   if (NILP (named))
@@ -2078,14 +2105,14 @@ If NODE is nil, return nil.  */)
 
   struct buffer *buf = XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
   ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
-  ptrdiff_t byte_beg = buf_charpos_to_bytepos (buf, XFIXNUM (beg));
-  ptrdiff_t byte_end = buf_charpos_to_bytepos (buf, XFIXNUM (end));
 
   treesit_check_position (beg, buf);
   treesit_check_position (end, buf);
 
   treesit_initialize ();
 
+  ptrdiff_t byte_beg = buf_charpos_to_bytepos (buf, XFIXNUM (beg));
+  ptrdiff_t byte_end = buf_charpos_to_bytepos (buf, XFIXNUM (end));
   TSNode treesit_node = XTS_NODE (node)->node;
   TSNode child;
   if (NILP (named))
@@ -2161,6 +2188,8 @@ See Info node `(elisp)Pattern Matching' for detailed explanation.  */)
     return build_pure_c_string ("#equal");
   if (EQ (pattern, QCmatch))
     return build_pure_c_string ("#match");
+  if (EQ (pattern, QCpred))
+    return build_pure_c_string ("#pred");
   Lisp_Object opening_delimeter
     = build_pure_c_string (VECTORP (pattern) ? "[" : "(");
   Lisp_Object closing_delimiter
@@ -2260,10 +2289,10 @@ treesit_predicates_for_pattern (TSQuery *query, uint32_t pattern_index)
   return Fnreverse (result);
 }
 
-/* Translate a capture NAME (symbol) to the text of the captured node.
+/* Translate a capture NAME (symbol) to a node.
    Signals treesit-query-error if such node is not captured.  */
 static Lisp_Object
-treesit_predicate_capture_name_to_text (Lisp_Object name,
+treesit_predicate_capture_name_to_node (Lisp_Object name,
 					struct capture_range captures)
 {
   Lisp_Object node = Qnil;
@@ -2283,6 +2312,16 @@ treesit_predicate_capture_name_to_text (Lisp_Object name,
 	      name, build_pure_c_string ("A predicate can only refer"
 					 " to captured nodes in the "
 					 "same pattern"));
+  return node;
+}
+
+/* Translate a capture NAME (symbol) to the text of the captured node.
+   Signals treesit-query-error if such node is not captured.  */
+static Lisp_Object
+treesit_predicate_capture_name_to_text (Lisp_Object name,
+					struct capture_range captures)
+{
+  Lisp_Object node = treesit_predicate_capture_name_to_node (name, captures);
 
   struct buffer *old_buffer = current_buffer;
   set_buffer_internal (XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer));
@@ -2356,13 +2395,30 @@ treesit_predicate_match (Lisp_Object args, struct capture_range captures)
     return false;
 }
 
-/* About predicates: I decide to hard-code predicates in C instead of
-   implementing an extensible system where predicates are translated
-   to Lisp functions, and new predicates can be added by extending a
-   list of functions, because I really couldn't imagine any useful
-   predicates besides equal and match.  If we later found out that
-   such system is indeed useful and necessary, it can be easily
-   added.  */
+/* Handles predicate (#pred FN ARG...).  Return true if FN returns
+   non-nil; return false otherwise.  The arity of FN must match the
+   number of ARGs  */
+static bool
+treesit_predicate_pred (Lisp_Object args, struct capture_range captures)
+{
+  if (XFIXNUM (Flength (args)) < 2)
+    xsignal2 (Qtreesit_query_error,
+	      build_pure_c_string ("Predicate `pred' requires "
+				   "at least two arguments, "
+				   "but was only given"),
+	      Flength (args));
+
+  Lisp_Object fn = Fintern (XCAR (args), Qnil);
+  Lisp_Object nodes = Qnil;
+  Lisp_Object tail = XCDR (args);
+  FOR_EACH_TAIL (tail)
+    nodes = Fcons (treesit_predicate_capture_name_to_node (XCAR (tail),
+							   captures),
+		   nodes);
+  nodes = Fnreverse (nodes);
+
+  return !NILP (CALLN (Fapply, fn, nodes));
+}
 
 /* If all predicates in PREDICATES passes, return true; otherwise
    return false.  */
@@ -2378,14 +2434,17 @@ treesit_eval_predicates (struct capture_range captures, Lisp_Object predicates)
       Lisp_Object fn = XCAR (predicate);
       Lisp_Object args = XCDR (predicate);
       if (!NILP (Fstring_equal (fn, build_pure_c_string ("equal"))))
-	pass = treesit_predicate_equal (args, captures);
+	pass &= treesit_predicate_equal (args, captures);
       else if (!NILP (Fstring_equal (fn, build_pure_c_string ("match"))))
-	pass = treesit_predicate_match (args, captures);
+	pass &= treesit_predicate_match (args, captures);
+      else if (!NILP (Fstring_equal (fn, build_pure_c_string ("pred"))))
+	pass &= treesit_predicate_pred (args, captures);
       else
 	xsignal3 (Qtreesit_query_error,
 		  build_pure_c_string ("Invalid predicate"),
 		  fn, build_pure_c_string ("Currently Emacs only supports"
-					   " equal and match predicate"));
+					   " equal, match, and pred"
+					   " predicate"));
     }
   /* If all predicates passed, add captures to result list.  */
   return pass;
@@ -2637,8 +2696,59 @@ treesit_assume_true (bool val)
   eassert (val == true);
 }
 
+/* Tries to move CURSOR to point to TARGET.  END_POS is the end of
+   TARGET.  If success, return true, otherwise move CURSOR back to
+   starting position and return false.  LIMIT is the recursion
+   limit.  */
+static bool
+treesit_cursor_helper_1 (TSTreeCursor *cursor, TSNode *target,
+			 uint32_t end_pos, ptrdiff_t limit)
+{
+  if (limit <= 0)
+    return false;
+
+  TSNode cursor_node = ts_tree_cursor_current_node (cursor);
+  if (ts_node_eq (cursor_node, *target))
+    return true;
+
+  if (!ts_tree_cursor_goto_first_child (cursor))
+    return false;
+
+  /* Skip nodes that definitely don't contain TARGET.  */
+  while (ts_node_end_byte (cursor_node) < end_pos)
+    {
+      if (!ts_tree_cursor_goto_next_sibling (cursor))
+	break;
+      cursor_node = ts_tree_cursor_current_node (cursor);
+    }
+
+  /* Go through each sibling that could contain TARGET.  Because of
+     missing nodes (their width is 0), there could be multiple
+     siblings that could contain TARGET.  */
+  while (ts_node_start_byte (cursor_node) <= end_pos)
+    {
+      if (treesit_cursor_helper_1 (cursor, target, end_pos, limit - 1))
+	return true;
+
+      if (!ts_tree_cursor_goto_next_sibling (cursor))
+	break;
+      cursor_node = ts_tree_cursor_current_node (cursor);
+    }
+
+  /* Couldn't find TARGET, must be not in this subtree, move cursor
+     back and pray that other brothers and sisters can succeed.  */
+  treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
+  return false;
+}
+
 /* Create a TSTreeCursor pointing at NODE.  PARSER is the lisp parser
-   that produced NODE.
+   that produced NODE.  If success, return true, otherwise return
+   false.  This function should almost always succeed, but if the parse
+   tree is strangely too deep and exceeds the recursion limit, this
+   function will fail and return false.
+
+   If this function returns true, caller needs to free CURSOR; if
+   returns false, caller don't need to free CURSOR.
 
    The reason we need this instead of simply using ts_tree_cursor_new
    is that we have to create the cursor on the root node and traverse
@@ -2646,56 +2756,17 @@ treesit_assume_true (bool val)
    Otherwise going to sibling or parent of NODE wouldn't work.
 
    (Wow perfect filling.)  */
-static TSTreeCursor
-treesit_cursor_helper (TSNode node, Lisp_Object parser)
+static bool
+treesit_cursor_helper (TSTreeCursor *cursor, TSNode node, Lisp_Object parser)
 {
   uint32_t end_pos = ts_node_end_byte (node);
   TSNode root = ts_tree_root_node (XTS_PARSER (parser)->tree);
-  TSTreeCursor cursor = ts_tree_cursor_new (root);
-  TSNode cursor_node = ts_tree_cursor_current_node (&cursor);
-  /* This is like treesit-node-at.  We go down from the root node,
-     either to first child or next sibling, repeatedly, and finally
-     arrive at NODE.  */
-  while (!ts_node_eq (node, cursor_node))
-    {
-      treesit_assume_true (ts_tree_cursor_goto_first_child (&cursor));
-      cursor_node = ts_tree_cursor_current_node (&cursor);
-      /* ts_tree_cursor_goto_first_child_for_byte is not reliable, so
-	 we just go through each sibling.  */
-      while (ts_node_is_missing (cursor_node)
-	     || ts_node_end_byte (cursor_node) < end_pos)
-	{
-	  /* A "missing" node has zero width, so it's possible that
-	     its end = NODE.end but it's not NODE, so we skip them.
-	     But we need to make sure this missing node is not the
-	     node we are looking for before skipping it.  */
-	  if (ts_node_is_missing (cursor_node)
-	      && ts_node_eq (node, cursor_node))
-	    return cursor;
-	  treesit_assume_true (ts_tree_cursor_goto_next_sibling (&cursor));
-	  cursor_node = ts_tree_cursor_current_node (&cursor);
-	}
-      /* Right now CURSOR.end >= NODE.end.  But what if CURSOR.end =
-	 NODE.end, and there are missing nodes after CURSOR, and the
-	 missing node after CURSOR is the NODE we are looking for??
-	 Well, create a probe and look ahead.  (This is tested by
-	 treesit-cursor-helper-with-missing-node.)  */
-      TSTreeCursor probe = ts_tree_cursor_copy (&cursor);
-      TSNode probe_node;
-      while (ts_tree_cursor_goto_next_sibling (&probe))
-	{
-	  probe_node = ts_tree_cursor_current_node (&probe);
-	  if (!ts_node_is_missing (probe_node))
-	    break;
-	  if (ts_node_eq (probe_node, node))
-	    {
-	      ts_tree_cursor_delete (&cursor);
-	      return probe;
-	    }
-	}
-      ts_tree_cursor_delete (&probe);
-    }
-  return cursor;
+  *cursor = ts_tree_cursor_new (root);
+  bool success = treesit_cursor_helper_1 (cursor, &node, end_pos,
+					  treesit_recursion_limit);
+  if (!success)
+    ts_tree_cursor_delete (cursor);
+  return success;
 }
 
 /* Move CURSOR to the next/previous sibling.  FORWARD controls the
@@ -2957,7 +3028,7 @@ Return the first matched node, or nil if none matches.  */)
 
   /* We use a default limit of 1000.  See bug#59426 for the
      discussion.  */
-  ptrdiff_t the_limit = 1000;
+  ptrdiff_t the_limit = treesit_recursion_limit;
   if (!NILP (limit))
     {
       CHECK_FIXNUM (limit);
@@ -2968,7 +3039,10 @@ Return the first matched node, or nil if none matches.  */)
 
   Lisp_Object parser = XTS_NODE (node)->parser;
   Lisp_Object return_value = Qnil;
-  TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (node)->node, parser);
+  TSTreeCursor cursor;
+  if (!treesit_cursor_helper (&cursor, XTS_NODE (node)->node, parser))
+    return return_value;
+
   if (treesit_search_dfs (&cursor, predicate, parser, NILP (backward),
 			  NILP (all), the_limit, false))
     {
@@ -3022,7 +3096,10 @@ always traverse leaf nodes first, then upwards.  */)
 
   Lisp_Object parser = XTS_NODE (start)->parser;
   Lisp_Object return_value = Qnil;
-  TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (start)->node, parser);
+  TSTreeCursor cursor;
+  if (!treesit_cursor_helper (&cursor, XTS_NODE (start)->node, parser))
+    return return_value;
+
   if (treesit_search_forward (&cursor, predicate, parser,
 			      NILP (backward), NILP (all)))
     {
@@ -3130,7 +3207,7 @@ a regexp.  */)
 
   /* We use a default limit of 1000.  See bug#59426 for the
      discussion.  */
-  ptrdiff_t the_limit = 1000;
+  ptrdiff_t the_limit = treesit_recursion_limit;
   if (!NILP (limit))
     {
       CHECK_FIXNUM (limit);
@@ -3141,7 +3218,10 @@ a regexp.  */)
 
   Lisp_Object parser = XTS_NODE (root)->parser;
   Lisp_Object parent = Fcons (Qnil, Qnil);
-  TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (root)->node, parser);
+  TSTreeCursor cursor;
+  if (!treesit_cursor_helper (&cursor, XTS_NODE (root)->node, parser))
+    return Qnil;
+
   treesit_build_sparse_tree (&cursor, parent, predicate, process_fn,
 			     the_limit, parser);
   ts_tree_cursor_delete (&cursor);
@@ -3187,6 +3267,7 @@ syms_of_treesit (void)
   DEFSYM (QCanchor, ":anchor");
   DEFSYM (QCequal, ":equal");
   DEFSYM (QCmatch, ":match");
+  DEFSYM (QCpred, ":pred");
 
   DEFSYM (Qnot_found, "not-found");
   DEFSYM (Qsymbol_error, "symbol-error");