summaryrefslogtreecommitdiff
path: root/lisp/progmodes/c-ts-mode.el
diff options
context:
space:
mode:
Diffstat (limited to 'lisp/progmodes/c-ts-mode.el')
-rw-r--r--lisp/progmodes/c-ts-mode.el435
1 files changed, 284 insertions, 151 deletions
diff --git a/lisp/progmodes/c-ts-mode.el b/lisp/progmodes/c-ts-mode.el
index cf941236f82..50b951888ae 100644
--- a/lisp/progmodes/c-ts-mode.el
+++ b/lisp/progmodes/c-ts-mode.el
@@ -39,6 +39,8 @@
(declare-function treesit-node-child-by-field-name "treesit.c")
(declare-function treesit-node-type "treesit.c")
+;;; Custom variables
+
(defcustom c-ts-mode-indent-offset 2
"Number of spaces for each indentation step in `c-ts-mode'."
:version "29.1"
@@ -61,6 +63,8 @@ follows the form of `treesit-simple-indent-rules'."
(function :tag "A function for user customized style" ignore))
:group 'c)
+;;; Syntax table
+
(defvar c-ts-mode--syntax-table
(let ((table (make-syntax-table)))
;; Taken from the cc-langs version
@@ -83,13 +87,29 @@ follows the form of `treesit-simple-indent-rules'."
table)
"Syntax table for `c-ts-mode'.")
-(defvar c++-ts-mode--syntax-table
- (let ((table (make-syntax-table c-ts-mode--syntax-table)))
- ;; Template delimiters.
- (modify-syntax-entry ?< "(" table)
- (modify-syntax-entry ?> ")" table)
- table)
- "Syntax table for `c++-ts-mode'.")
+(defun c-ts-mode--syntax-propertize (beg end)
+ "Apply syntax text property to template delimiters between BEG and END.
+
+< and > are usually punctuation, e.g., in ->. But when used for
+templates, they should be considered pairs.
+
+This function checks for < and > in the changed RANGES and apply
+appropriate text property to alter the syntax of template
+delimiters < and >'s."
+ (goto-char beg)
+ (while (re-search-forward (rx (or "<" ">")) end t)
+ (pcase (treesit-node-type
+ (treesit-node-parent
+ (treesit-node-at (match-beginning 0))))
+ ("template_argument_list"
+ (put-text-property (match-beginning 0)
+ (match-end 0)
+ 'syntax-table
+ (pcase (char-before)
+ (?< '(4 . ?>))
+ (?> '(5 . ?<))))))))
+
+;;; Indent
(defun c-ts-mode--indent-styles (mode)
"Indent rules supported by `c-ts-mode'.
@@ -98,11 +118,13 @@ MODE is either `c' or `cpp'."
`(((parent-is "translation_unit") parent-bol 0)
((node-is ")") parent 1)
((node-is "]") parent-bol 0)
- ((node-is "}") (and parent parent-bol) 0)
+ ((node-is "}") c-ts-mode--bracket-children-anchor 0)
((node-is "else") parent-bol 0)
((node-is "case") parent-bol 0)
((node-is "preproc_arg") no-indent)
- ((and (parent-is "comment") comment-end) comment-start -1)
+ ((and (parent-is "comment") c-ts-mode--looking-at-star)
+ c-ts-mode--comment-start-after-first-star -1)
+ ((parent-is "comment") prev-adaptive-prefix 0)
((node-is "labeled_statement") parent-bol 0)
((parent-is "labeled_statement") parent-bol c-ts-mode-indent-offset)
((match "preproc_ifdef" "compound_statement") point-min 0)
@@ -111,7 +133,8 @@ MODE is either `c' or `cpp'."
((match "#endif" "preproc_if") point-min 0)
((match "preproc_function_def" "compound_statement") point-min 0)
((match "preproc_call" "compound_statement") point-min 0)
- ((parent-is "compound_statement") (and parent parent-bol) c-ts-mode-indent-offset)
+ ((parent-is "compound_statement")
+ c-ts-mode--bracket-children-anchor c-ts-mode-indent-offset)
((parent-is "function_definition") parent-bol 0)
((parent-is "conditional_expression") first-sibling 0)
((parent-is "assignment_expression") parent-bol c-ts-mode-indent-offset)
@@ -167,6 +190,39 @@ MODE is either `c' or `cpp'."
('linux (alist-get 'linux (c-ts-mode--indent-styles mode)))))))
`((,mode ,@style))))
+(defun c-ts-mode--bracket-children-anchor (_n parent &rest _)
+ "This anchor is used for children of a compound_statement.
+So anything inside a {} block. PARENT should be the
+compound_statement. This anchor looks at the {, if itson its own
+line, anchor at it, if it has stuff before it, anchor at the
+beginning of grandparent."
+ (save-excursion
+ (goto-char (treesit-node-start parent))
+ (let ((bol (line-beginning-position)))
+ (skip-chars-backward " \t")
+ (treesit-node-start
+ (if (< bol (point))
+ (treesit-node-parent parent)
+ parent)))))
+
+(defun c-ts-mode--looking-at-star (&rest _)
+ "A tree-sitter simple indent matcher.
+Matches if there is a \"*\" after point (ignoring whitespace in
+between)."
+ (looking-at (rx (* (syntax whitespace)) "*")))
+
+(defun c-ts-mode--comment-start-after-first-star (_n parent &rest _)
+ "A tree-sitter simple indent anchor.
+Finds the \"/*\" and returns the point after the \"*\".
+Assumes PARENT is a comment node."
+ (save-excursion
+ (goto-char (treesit-node-start parent))
+ (if (looking-at (rx "/*"))
+ (match-end 0)
+ (point))))
+
+;;; Font-lock
+
(defvar c-ts-mode--preproc-keywords
'("#define" "#if" "#ifdef" "#ifndef"
"#else" "#elif" "#endif" "#include")
@@ -361,28 +417,34 @@ MODE is either `c' or `cpp'."
@c-ts-mode--fontify-defun)
(:match "^DEFUN$" @fn)))))
-(defun c-ts-mode--fontify-declarator (node override start end &rest args)
- "Fontify a declarator (whatever under the \"declarator\" field).
-For NODE, OVERRIDE, START, END, and ARGS, see
-`treesit-font-lock-rules'."
+;;; Font-lock helpers
+
+(defun c-ts-mode--declarator-identifier (node)
+ "Return the identifier of the declarator node NODE."
(pcase (treesit-node-type node)
+ ;; Recurse.
((or "attributed_declarator" "parenthesized_declarator")
- (apply #'c-ts-mode--fontify-declarator
- (treesit-node-child node 0 t) override start end args))
+ (c-ts-mode--declarator-identifier (treesit-node-child node 0 t)))
("pointer_declarator"
- (apply #'c-ts-mode--fontify-declarator
- (treesit-node-child node -1) override start end args))
+ (c-ts-mode--declarator-identifier (treesit-node-child node -1)))
((or "function_declarator" "array_declarator" "init_declarator")
- (apply #'c-ts-mode--fontify-declarator
- (treesit-node-child-by-field-name node "declarator")
- override start end args))
+ (c-ts-mode--declarator-identifier
+ (treesit-node-child-by-field-name node "declarator")))
+ ;; Terminal case.
((or "identifier" "field_identifier")
- (treesit-fontify-with-override
- (treesit-node-start node) (treesit-node-end node)
- (pcase (treesit-node-type (treesit-node-parent node))
- ("function_declarator" 'font-lock-function-name-face)
- (_ 'font-lock-variable-name-face))
- override start end))))
+ node)))
+
+(defun c-ts-mode--fontify-declarator (node override start end &rest _args)
+ "Fontify a declarator (whatever under the \"declarator\" field).
+For NODE, OVERRIDE, START, END, and ARGS, see
+`treesit-font-lock-rules'."
+ (let* ((identifier (c-ts-mode--declarator-identifier node))
+ (face (pcase (treesit-node-type (treesit-node-parent identifier))
+ ("function_declarator" 'font-lock-function-name-face)
+ (_ 'font-lock-variable-name-face))))
+ (treesit-fontify-with-override
+ (treesit-node-start identifier) (treesit-node-end identifier)
+ face override start end)))
(defun c-ts-mode--fontify-variable (node override start end &rest _)
"Fontify an identifier node if it is a variable.
@@ -453,94 +515,48 @@ For NODE, OVERRIDE, START, and END, see
(t 'font-lock-warning-face))
override start end)))
-(defun c-ts-mode--imenu-1 (node)
- "Helper for `c-ts-mode--imenu'.
-Find string representation for NODE and set marker, then recurse
-the subtrees."
- (let* ((ts-node (car node))
- (subtrees (mapcan #'c-ts-mode--imenu-1 (cdr node)))
- (name (when ts-node
- (treesit-node-text
- (pcase (treesit-node-type ts-node)
- ("function_definition"
- (treesit-node-child-by-field-name
- (treesit-node-child-by-field-name
- ts-node "declarator")
- "declarator"))
- ("declaration"
- (let ((child (treesit-node-child ts-node -1 t)))
- (pcase (treesit-node-type child)
- ("identifier" child)
- (_ (treesit-node-child-by-field-name
- child "declarator")))))
- ("struct_specifier"
- (treesit-node-child-by-field-name
- ts-node "name"))))))
- (marker (when ts-node
- (set-marker (make-marker)
- (treesit-node-start ts-node)))))
- (cond
- ;; A struct_specifier could be inside a parameter list, another
- ;; struct definition, a variable declaration, a function
- ;; declaration. In those cases we don't include it.
- ((string-match-p
- (rx (or "parameter_declaration" "field_declaration"
- "declaration" "function_definition"))
- (or (treesit-node-type (treesit-node-parent ts-node))
- ""))
- nil)
- ;; Ignore function local variable declarations.
- ((and (equal (treesit-node-type ts-node) "declaration")
- (not (equal (treesit-node-type (treesit-node-parent ts-node))
- "translation_unit")))
- nil)
- ((or (null ts-node) (null name)) subtrees)
- (subtrees
- `((,name ,(cons name marker) ,@subtrees)))
- (t
- `((,name . ,marker))))))
-
-(defun c-ts-mode--imenu ()
- "Return Imenu alist for the current buffer."
- (let* ((node (treesit-buffer-root-node))
- (func-tree (treesit-induce-sparse-tree
- node "^function_definition$" nil 1000))
- (var-tree (treesit-induce-sparse-tree
- node "^declaration$" nil 1000))
- (struct-tree (treesit-induce-sparse-tree
- node "^struct_specifier$" nil 1000))
- (func-index (c-ts-mode--imenu-1 func-tree))
- (var-index (c-ts-mode--imenu-1 var-tree))
- (struct-index (c-ts-mode--imenu-1 struct-tree)))
- (append
- (when struct-index `(("Struct" . ,struct-index)))
- (when var-index `(("Variable" . ,var-index)))
- (when func-index `(("Function" . ,func-index))))))
-
-(defun c-ts-mode--end-of-defun ()
- "`end-of-defun-function' of `c-ts-mode'."
- ;; A struct/enum/union_specifier node doesn't include the ; at the
- ;; end, so we manually skip it.
- (treesit-end-of-defun)
- (when (looking-at (rx (* " ") ";"))
- (goto-char (match-end 0))
- ;; This part is copied from `end-of-defun'.
- (unless (bolp)
- (skip-chars-forward " \t")
- (if (looking-at "\\s<\\|\n")
- (forward-line 1)))))
+;;; Imenu
+
+(defun c-ts-mode--defun-name (node)
+ "Return the name of the defun NODE.
+Return nil if NODE is not a defun node or doesn't have a name."
+ (treesit-node-text
+ (pcase (treesit-node-type node)
+ ((or "function_definition" "declaration")
+ (c-ts-mode--declarator-identifier
+ (treesit-node-child-by-field-name node "declarator")))
+ ((or "struct_specifier" "enum_specifier"
+ "union_specifier" "class_specifier")
+ (treesit-node-child-by-field-name node "name")))
+ t))
+
+;;; Defun navigation
(defun c-ts-mode--defun-valid-p (node)
- (if (string-match-p
- (rx (or "struct_specifier"
- "enum_specifier"
- "union_specifier"))
- (treesit-node-type node))
- (null
- (treesit-node-top-level
- node (rx (or "function_definition"
- "type_definition"))))
- t))
+ "Return non-nil if NODE is a valid defun node.
+Ie, NODE is not nested."
+ (not (or (and (member (treesit-node-type node)
+ '("struct_specifier"
+ "enum_specifier"
+ "union_specifier"
+ "declaration"))
+ ;; If NODE's type is one of the above, make sure it is
+ ;; top-level.
+ (treesit-node-top-level
+ node (rx (or "function_definition"
+ "type_definition"
+ "struct_specifier"
+ "enum_specifier"
+ "union_specifier"
+ "declaration"))))
+
+ (and (equal (treesit-node-type node) "declaration")
+ ;; If NODE is a declaration, make sure it is not a
+ ;; function declaration.
+ (equal (treesit-node-type
+ (treesit-node-child-by-field-name
+ node "declarator"))
+ "function_declarator")))))
(defun c-ts-mode--defun-skipper ()
"Custom defun skipper for `c-ts-mode' and friends.
@@ -556,15 +572,144 @@ the semicolon. This function skips the semicolon."
`treesit-defun-type-regexp' defines what constructs to indent."
(interactive "*")
- (let ((orig-point (point-marker)))
- ;; If `treesit-beginning-of-defun' returns nil, we are not in a
- ;; defun, so don't indent anything.
- (when (treesit-beginning-of-defun)
- (let ((start (point)))
- (treesit-end-of-defun)
- (indent-region start (point))))
+ (when-let ((orig-point (point-marker))
+ (node (treesit-defun-at-point)))
+ (indent-region (treesit-node-start node)
+ (treesit-node-end node))
(goto-char orig-point)))
+;;; Filling
+
+(defun c-ts-mode--fill-paragraph (&optional arg)
+ "Fillling function for `c-ts-mode'.
+ARG is passed to `fill-paragraph'."
+ (interactive "*P")
+ (save-restriction
+ (widen)
+ (let* ((node (treesit-node-at (point)))
+ (start (treesit-node-start node))
+ (end (treesit-node-end node))
+ ;; Bind to nil to avoid infinite recursion.
+ (fill-paragraph-function nil)
+ (orig-point (point-marker))
+ (start-marker nil)
+ (end-marker nil)
+ (end-len 0))
+ (when (equal (treesit-node-type node) "comment")
+ ;; We mask "/*" and the space before "*/" like
+ ;; `c-fill-paragraph' does.
+ (atomic-change-group
+ ;; Mask "/*".
+ (goto-char start)
+ (when (looking-at (rx (* (syntax whitespace))
+ (group "/") "*"))
+ (goto-char (match-beginning 1))
+ (setq start-marker (point-marker))
+ (replace-match " " nil nil nil 1))
+ ;; Include whitespaces before /*.
+ (goto-char start)
+ (beginning-of-line)
+ (setq start (point))
+ ;; Mask spaces before "*/" if it is attached at the end
+ ;; of a sentence rather than on its own line.
+ (goto-char end)
+ (when (looking-back (rx (not (syntax whitespace))
+ (group (+ (syntax whitespace)))
+ "*/")
+ (line-beginning-position))
+ (goto-char (match-beginning 1))
+ (setq end-marker (point-marker))
+ (setq end-len (- (match-end 1) (match-beginning 1)))
+ (replace-match (make-string end-len ?x)
+ nil nil nil 1))
+ ;; If "*/" is on its own line, don't included it in the
+ ;; filling region.
+ (when (not end-marker)
+ (goto-char end)
+ (when (looking-back (rx "*/") 2)
+ (backward-char 2)
+ (skip-syntax-backward "-")
+ (setq end (point))))
+ ;; Let `fill-paragraph' do its thing.
+ (goto-char orig-point)
+ (narrow-to-region start end)
+ (funcall #'fill-paragraph arg)
+ ;; Unmask.
+ (when start-marker
+ (goto-char start-marker)
+ (delete-char 1)
+ (insert "/"))
+ (when end-marker
+ (goto-char end-marker)
+ (delete-region (point) (+ end-len (point)))
+ (insert (make-string end-len ?\s))))
+ (goto-char orig-point))
+ ;; Return t so `fill-paragraph' doesn't attempt to fill by
+ ;; itself.
+ t)))
+
+(defun c-ts-mode-comment-setup ()
+ "Set up local variables for C-like comment.
+
+Set up:
+ - `comment-start'
+ - `comment-end'
+ - `comment-start-skip'
+ - `comment-end-skip'
+ - `adaptive-fill-mode'
+ - `adaptive-fill-first-line-regexp'
+ - `paragraph-start'
+ - `paragraph-separate'
+ - `fill-paragraph-function'"
+ (setq-local comment-start "// ")
+ (setq-local comment-end "")
+ (setq-local comment-start-skip (rx (or (seq "/" (+ "/"))
+ (seq "/" (+ "*")))
+ (* (syntax whitespace))))
+ (setq-local comment-end-skip
+ (rx (* (syntax whitespace))
+ (group (or (syntax comment-end)
+ (seq (+ "*") "/")))))
+ (setq-local adaptive-fill-mode t)
+ ;; This matches (1) empty spaces (the default), (2) "//", (3) "*",
+ ;; but do not match "/*", because we don't want to use "/*" as
+ ;; prefix when filling. (Actually, it doesn't matter, because
+ ;; `comment-start-skip' matches "/*" which will cause
+ ;; `fill-context-prefix' to use "/*" as a prefix for filling, that's
+ ;; why we mask the "/*" in `c-ts-mode--fill-paragraph'.)
+ (setq-local adaptive-fill-regexp
+ (concat (rx (* (syntax whitespace))
+ (group (or (seq "/" (+ "/")) (* "*"))))
+ adaptive-fill-regexp))
+ ;; Note the missing * comparing to `adaptive-fill-regexp'. The
+ ;; reason for its absence is a bit convoluted to explain. Suffice
+ ;; to say that without it, filling a single line paragraph that
+ ;; starts with /* doesn't insert * at the beginning of each
+ ;; following line, and filling a multi-line paragraph whose first
+ ;; two lines start with * does insert * at the beginning of each
+ ;; following line. If you know how does adaptive filling works, you
+ ;; know what I mean.
+ (setq-local adaptive-fill-first-line-regexp
+ (rx bos
+ (seq (* (syntax whitespace))
+ (group (seq "/" (+ "/")))
+ (* (syntax whitespace)))
+ eos))
+ ;; Same as `adaptive-fill-regexp'.
+ (setq-local paragraph-start
+ (rx (or (seq (* (syntax whitespace))
+ (group (or (seq "/" (+ "/")) (* "*")))
+ (* (syntax whitespace))
+ ;; Add this eol so that in
+ ;; `fill-context-prefix', `paragraph-start'
+ ;; doesn't match the prefix.
+ eol)
+ "\f")))
+ (setq-local paragraph-separate paragraph-start)
+ (setq-local fill-paragraph-function #'c-ts-mode--fill-paragraph))
+
+;;; Modes
+
(defvar-keymap c-ts-mode-map
:doc "Keymap for the C language with tree-sitter"
:parent prog-mode-map
@@ -587,6 +732,7 @@ the semicolon. This function skips the semicolon."
"class_specifier"))
#'c-ts-mode--defun-valid-p))
(setq-local treesit-defun-skipper #'c-ts-mode--defun-skipper)
+ (setq-local treesit-defun-name-function #'c-ts-mode--defun-name)
;; Nodes like struct/enum/union_specifier can appear in
;; function_definitions, so we need to find the top-level node.
@@ -596,13 +742,25 @@ the semicolon. This function skips the semicolon."
(when (eq c-ts-mode-indent-style 'linux)
(setq-local indent-tabs-mode t))
+ ;; Comment
+ (c-ts-mode-comment-setup)
+
;; Electric
(setq-local electric-indent-chars
(append "{}():;," electric-indent-chars))
;; Imenu.
- (setq-local imenu-create-index-function #'c-ts-mode--imenu)
- (setq-local which-func-functions nil)
+ (setq-local treesit-simple-imenu-settings
+ (let ((pred #'c-ts-mode--defun-valid-p))
+ `(("Struct" ,(rx bos (or "struct" "enum" "union")
+ "_specifier" eos)
+ ,pred nil)
+ ("Variable" ,(rx bos "declaration" eos) ,pred nil)
+ ("Function" "\\`function_definition\\'" ,pred nil)
+ ("Class" ,(rx bos (or "class_specifier"
+ "function_definition")
+ eos)
+ ,pred nil))))
(setq-local treesit-font-lock-feature-list
'(( comment definition)
@@ -623,13 +781,6 @@ the semicolon. This function skips the semicolon."
;; Comments.
(setq-local comment-start "/* ")
(setq-local comment-end " */")
- (setq-local comment-start-skip (rx (or (seq "/" (+ "/"))
- (seq "/" (+ "*")))
- (* (syntax whitespace))))
- (setq-local comment-end-skip
- (rx (* (syntax whitespace))
- (group (or (syntax comment-end)
- (seq (+ "*") "/")))))
(setq-local treesit-simple-indent-rules
(c-ts-mode--set-indent-style 'c))
@@ -637,37 +788,23 @@ the semicolon. This function skips the semicolon."
;; Font-lock.
(setq-local treesit-font-lock-settings (c-ts-mode--font-lock-settings 'c))
- (treesit-major-mode-setup)
-
- ;; Override default value of end-of-defun-function set by
- ;; `treesit-major-mode-setup'.
- (setq-local end-of-defun-function #'c-ts-mode--end-of-defun))
+ (treesit-major-mode-setup))
;;;###autoload
(define-derived-mode c++-ts-mode c-ts-base-mode "C++"
"Major mode for editing C++, powered by tree-sitter."
:group 'c++
- :syntax-table c++-ts-mode--syntax-table
(unless (treesit-ready-p 'cpp)
(error "Tree-sitter for C++ isn't available"))
- ;; Comments.
- (setq-local comment-start "// ")
- (setq-local comment-end "")
- (setq-local comment-start-skip (rx (or (seq "/" (+ "/"))
- (seq "/" (+ "*")))
- (* (syntax whitespace))))
- (setq-local comment-end-skip
- (rx (* (syntax whitespace))
- (group (or (syntax comment-end)
- (seq (+ "*") "/")))))
-
(setq-local treesit-text-type-regexp
(regexp-opt '("comment"
"raw_string_literal")))
(treesit-parser-create 'cpp)
+ (setq-local syntax-propertize-function
+ #'c-ts-mode--syntax-propertize)
(setq-local treesit-simple-indent-rules
(c-ts-mode--set-indent-style 'cpp))
@@ -675,11 +812,7 @@ the semicolon. This function skips the semicolon."
;; Font-lock.
(setq-local treesit-font-lock-settings (c-ts-mode--font-lock-settings 'cpp))
- (treesit-major-mode-setup)
-
- ;; Override default value of end-of-defun-function set by
- ;; `treesit-major-mode-setup'.
- (setq-local end-of-defun-function #'c-ts-mode--end-of-defun))
+ (treesit-major-mode-setup))
(provide 'c-ts-mode)