diff options
Diffstat (limited to 'lisp/progmodes/c-ts-mode.el')
-rw-r--r-- | lisp/progmodes/c-ts-mode.el | 435 |
1 files changed, 284 insertions, 151 deletions
diff --git a/lisp/progmodes/c-ts-mode.el b/lisp/progmodes/c-ts-mode.el index cf941236f82..50b951888ae 100644 --- a/lisp/progmodes/c-ts-mode.el +++ b/lisp/progmodes/c-ts-mode.el @@ -39,6 +39,8 @@ (declare-function treesit-node-child-by-field-name "treesit.c") (declare-function treesit-node-type "treesit.c") +;;; Custom variables + (defcustom c-ts-mode-indent-offset 2 "Number of spaces for each indentation step in `c-ts-mode'." :version "29.1" @@ -61,6 +63,8 @@ follows the form of `treesit-simple-indent-rules'." (function :tag "A function for user customized style" ignore)) :group 'c) +;;; Syntax table + (defvar c-ts-mode--syntax-table (let ((table (make-syntax-table))) ;; Taken from the cc-langs version @@ -83,13 +87,29 @@ follows the form of `treesit-simple-indent-rules'." table) "Syntax table for `c-ts-mode'.") -(defvar c++-ts-mode--syntax-table - (let ((table (make-syntax-table c-ts-mode--syntax-table))) - ;; Template delimiters. - (modify-syntax-entry ?< "(" table) - (modify-syntax-entry ?> ")" table) - table) - "Syntax table for `c++-ts-mode'.") +(defun c-ts-mode--syntax-propertize (beg end) + "Apply syntax text property to template delimiters between BEG and END. + +< and > are usually punctuation, e.g., in ->. But when used for +templates, they should be considered pairs. + +This function checks for < and > in the changed RANGES and apply +appropriate text property to alter the syntax of template +delimiters < and >'s." + (goto-char beg) + (while (re-search-forward (rx (or "<" ">")) end t) + (pcase (treesit-node-type + (treesit-node-parent + (treesit-node-at (match-beginning 0)))) + ("template_argument_list" + (put-text-property (match-beginning 0) + (match-end 0) + 'syntax-table + (pcase (char-before) + (?< '(4 . ?>)) + (?> '(5 . ?<)))))))) + +;;; Indent (defun c-ts-mode--indent-styles (mode) "Indent rules supported by `c-ts-mode'. @@ -98,11 +118,13 @@ MODE is either `c' or `cpp'." `(((parent-is "translation_unit") parent-bol 0) ((node-is ")") parent 1) ((node-is "]") parent-bol 0) - ((node-is "}") (and parent parent-bol) 0) + ((node-is "}") c-ts-mode--bracket-children-anchor 0) ((node-is "else") parent-bol 0) ((node-is "case") parent-bol 0) ((node-is "preproc_arg") no-indent) - ((and (parent-is "comment") comment-end) comment-start -1) + ((and (parent-is "comment") c-ts-mode--looking-at-star) + c-ts-mode--comment-start-after-first-star -1) + ((parent-is "comment") prev-adaptive-prefix 0) ((node-is "labeled_statement") parent-bol 0) ((parent-is "labeled_statement") parent-bol c-ts-mode-indent-offset) ((match "preproc_ifdef" "compound_statement") point-min 0) @@ -111,7 +133,8 @@ MODE is either `c' or `cpp'." ((match "#endif" "preproc_if") point-min 0) ((match "preproc_function_def" "compound_statement") point-min 0) ((match "preproc_call" "compound_statement") point-min 0) - ((parent-is "compound_statement") (and parent parent-bol) c-ts-mode-indent-offset) + ((parent-is "compound_statement") + c-ts-mode--bracket-children-anchor c-ts-mode-indent-offset) ((parent-is "function_definition") parent-bol 0) ((parent-is "conditional_expression") first-sibling 0) ((parent-is "assignment_expression") parent-bol c-ts-mode-indent-offset) @@ -167,6 +190,39 @@ MODE is either `c' or `cpp'." ('linux (alist-get 'linux (c-ts-mode--indent-styles mode))))))) `((,mode ,@style)))) +(defun c-ts-mode--bracket-children-anchor (_n parent &rest _) + "This anchor is used for children of a compound_statement. +So anything inside a {} block. PARENT should be the +compound_statement. This anchor looks at the {, if itson its own +line, anchor at it, if it has stuff before it, anchor at the +beginning of grandparent." + (save-excursion + (goto-char (treesit-node-start parent)) + (let ((bol (line-beginning-position))) + (skip-chars-backward " \t") + (treesit-node-start + (if (< bol (point)) + (treesit-node-parent parent) + parent))))) + +(defun c-ts-mode--looking-at-star (&rest _) + "A tree-sitter simple indent matcher. +Matches if there is a \"*\" after point (ignoring whitespace in +between)." + (looking-at (rx (* (syntax whitespace)) "*"))) + +(defun c-ts-mode--comment-start-after-first-star (_n parent &rest _) + "A tree-sitter simple indent anchor. +Finds the \"/*\" and returns the point after the \"*\". +Assumes PARENT is a comment node." + (save-excursion + (goto-char (treesit-node-start parent)) + (if (looking-at (rx "/*")) + (match-end 0) + (point)))) + +;;; Font-lock + (defvar c-ts-mode--preproc-keywords '("#define" "#if" "#ifdef" "#ifndef" "#else" "#elif" "#endif" "#include") @@ -361,28 +417,34 @@ MODE is either `c' or `cpp'." @c-ts-mode--fontify-defun) (:match "^DEFUN$" @fn))))) -(defun c-ts-mode--fontify-declarator (node override start end &rest args) - "Fontify a declarator (whatever under the \"declarator\" field). -For NODE, OVERRIDE, START, END, and ARGS, see -`treesit-font-lock-rules'." +;;; Font-lock helpers + +(defun c-ts-mode--declarator-identifier (node) + "Return the identifier of the declarator node NODE." (pcase (treesit-node-type node) + ;; Recurse. ((or "attributed_declarator" "parenthesized_declarator") - (apply #'c-ts-mode--fontify-declarator - (treesit-node-child node 0 t) override start end args)) + (c-ts-mode--declarator-identifier (treesit-node-child node 0 t))) ("pointer_declarator" - (apply #'c-ts-mode--fontify-declarator - (treesit-node-child node -1) override start end args)) + (c-ts-mode--declarator-identifier (treesit-node-child node -1))) ((or "function_declarator" "array_declarator" "init_declarator") - (apply #'c-ts-mode--fontify-declarator - (treesit-node-child-by-field-name node "declarator") - override start end args)) + (c-ts-mode--declarator-identifier + (treesit-node-child-by-field-name node "declarator"))) + ;; Terminal case. ((or "identifier" "field_identifier") - (treesit-fontify-with-override - (treesit-node-start node) (treesit-node-end node) - (pcase (treesit-node-type (treesit-node-parent node)) - ("function_declarator" 'font-lock-function-name-face) - (_ 'font-lock-variable-name-face)) - override start end)))) + node))) + +(defun c-ts-mode--fontify-declarator (node override start end &rest _args) + "Fontify a declarator (whatever under the \"declarator\" field). +For NODE, OVERRIDE, START, END, and ARGS, see +`treesit-font-lock-rules'." + (let* ((identifier (c-ts-mode--declarator-identifier node)) + (face (pcase (treesit-node-type (treesit-node-parent identifier)) + ("function_declarator" 'font-lock-function-name-face) + (_ 'font-lock-variable-name-face)))) + (treesit-fontify-with-override + (treesit-node-start identifier) (treesit-node-end identifier) + face override start end))) (defun c-ts-mode--fontify-variable (node override start end &rest _) "Fontify an identifier node if it is a variable. @@ -453,94 +515,48 @@ For NODE, OVERRIDE, START, and END, see (t 'font-lock-warning-face)) override start end))) -(defun c-ts-mode--imenu-1 (node) - "Helper for `c-ts-mode--imenu'. -Find string representation for NODE and set marker, then recurse -the subtrees." - (let* ((ts-node (car node)) - (subtrees (mapcan #'c-ts-mode--imenu-1 (cdr node))) - (name (when ts-node - (treesit-node-text - (pcase (treesit-node-type ts-node) - ("function_definition" - (treesit-node-child-by-field-name - (treesit-node-child-by-field-name - ts-node "declarator") - "declarator")) - ("declaration" - (let ((child (treesit-node-child ts-node -1 t))) - (pcase (treesit-node-type child) - ("identifier" child) - (_ (treesit-node-child-by-field-name - child "declarator"))))) - ("struct_specifier" - (treesit-node-child-by-field-name - ts-node "name")))))) - (marker (when ts-node - (set-marker (make-marker) - (treesit-node-start ts-node))))) - (cond - ;; A struct_specifier could be inside a parameter list, another - ;; struct definition, a variable declaration, a function - ;; declaration. In those cases we don't include it. - ((string-match-p - (rx (or "parameter_declaration" "field_declaration" - "declaration" "function_definition")) - (or (treesit-node-type (treesit-node-parent ts-node)) - "")) - nil) - ;; Ignore function local variable declarations. - ((and (equal (treesit-node-type ts-node) "declaration") - (not (equal (treesit-node-type (treesit-node-parent ts-node)) - "translation_unit"))) - nil) - ((or (null ts-node) (null name)) subtrees) - (subtrees - `((,name ,(cons name marker) ,@subtrees))) - (t - `((,name . ,marker)))))) - -(defun c-ts-mode--imenu () - "Return Imenu alist for the current buffer." - (let* ((node (treesit-buffer-root-node)) - (func-tree (treesit-induce-sparse-tree - node "^function_definition$" nil 1000)) - (var-tree (treesit-induce-sparse-tree - node "^declaration$" nil 1000)) - (struct-tree (treesit-induce-sparse-tree - node "^struct_specifier$" nil 1000)) - (func-index (c-ts-mode--imenu-1 func-tree)) - (var-index (c-ts-mode--imenu-1 var-tree)) - (struct-index (c-ts-mode--imenu-1 struct-tree))) - (append - (when struct-index `(("Struct" . ,struct-index))) - (when var-index `(("Variable" . ,var-index))) - (when func-index `(("Function" . ,func-index)))))) - -(defun c-ts-mode--end-of-defun () - "`end-of-defun-function' of `c-ts-mode'." - ;; A struct/enum/union_specifier node doesn't include the ; at the - ;; end, so we manually skip it. - (treesit-end-of-defun) - (when (looking-at (rx (* " ") ";")) - (goto-char (match-end 0)) - ;; This part is copied from `end-of-defun'. - (unless (bolp) - (skip-chars-forward " \t") - (if (looking-at "\\s<\\|\n") - (forward-line 1))))) +;;; Imenu + +(defun c-ts-mode--defun-name (node) + "Return the name of the defun NODE. +Return nil if NODE is not a defun node or doesn't have a name." + (treesit-node-text + (pcase (treesit-node-type node) + ((or "function_definition" "declaration") + (c-ts-mode--declarator-identifier + (treesit-node-child-by-field-name node "declarator"))) + ((or "struct_specifier" "enum_specifier" + "union_specifier" "class_specifier") + (treesit-node-child-by-field-name node "name"))) + t)) + +;;; Defun navigation (defun c-ts-mode--defun-valid-p (node) - (if (string-match-p - (rx (or "struct_specifier" - "enum_specifier" - "union_specifier")) - (treesit-node-type node)) - (null - (treesit-node-top-level - node (rx (or "function_definition" - "type_definition")))) - t)) + "Return non-nil if NODE is a valid defun node. +Ie, NODE is not nested." + (not (or (and (member (treesit-node-type node) + '("struct_specifier" + "enum_specifier" + "union_specifier" + "declaration")) + ;; If NODE's type is one of the above, make sure it is + ;; top-level. + (treesit-node-top-level + node (rx (or "function_definition" + "type_definition" + "struct_specifier" + "enum_specifier" + "union_specifier" + "declaration")))) + + (and (equal (treesit-node-type node) "declaration") + ;; If NODE is a declaration, make sure it is not a + ;; function declaration. + (equal (treesit-node-type + (treesit-node-child-by-field-name + node "declarator")) + "function_declarator"))))) (defun c-ts-mode--defun-skipper () "Custom defun skipper for `c-ts-mode' and friends. @@ -556,15 +572,144 @@ the semicolon. This function skips the semicolon." `treesit-defun-type-regexp' defines what constructs to indent." (interactive "*") - (let ((orig-point (point-marker))) - ;; If `treesit-beginning-of-defun' returns nil, we are not in a - ;; defun, so don't indent anything. - (when (treesit-beginning-of-defun) - (let ((start (point))) - (treesit-end-of-defun) - (indent-region start (point)))) + (when-let ((orig-point (point-marker)) + (node (treesit-defun-at-point))) + (indent-region (treesit-node-start node) + (treesit-node-end node)) (goto-char orig-point))) +;;; Filling + +(defun c-ts-mode--fill-paragraph (&optional arg) + "Fillling function for `c-ts-mode'. +ARG is passed to `fill-paragraph'." + (interactive "*P") + (save-restriction + (widen) + (let* ((node (treesit-node-at (point))) + (start (treesit-node-start node)) + (end (treesit-node-end node)) + ;; Bind to nil to avoid infinite recursion. + (fill-paragraph-function nil) + (orig-point (point-marker)) + (start-marker nil) + (end-marker nil) + (end-len 0)) + (when (equal (treesit-node-type node) "comment") + ;; We mask "/*" and the space before "*/" like + ;; `c-fill-paragraph' does. + (atomic-change-group + ;; Mask "/*". + (goto-char start) + (when (looking-at (rx (* (syntax whitespace)) + (group "/") "*")) + (goto-char (match-beginning 1)) + (setq start-marker (point-marker)) + (replace-match " " nil nil nil 1)) + ;; Include whitespaces before /*. + (goto-char start) + (beginning-of-line) + (setq start (point)) + ;; Mask spaces before "*/" if it is attached at the end + ;; of a sentence rather than on its own line. + (goto-char end) + (when (looking-back (rx (not (syntax whitespace)) + (group (+ (syntax whitespace))) + "*/") + (line-beginning-position)) + (goto-char (match-beginning 1)) + (setq end-marker (point-marker)) + (setq end-len (- (match-end 1) (match-beginning 1))) + (replace-match (make-string end-len ?x) + nil nil nil 1)) + ;; If "*/" is on its own line, don't included it in the + ;; filling region. + (when (not end-marker) + (goto-char end) + (when (looking-back (rx "*/") 2) + (backward-char 2) + (skip-syntax-backward "-") + (setq end (point)))) + ;; Let `fill-paragraph' do its thing. + (goto-char orig-point) + (narrow-to-region start end) + (funcall #'fill-paragraph arg) + ;; Unmask. + (when start-marker + (goto-char start-marker) + (delete-char 1) + (insert "/")) + (when end-marker + (goto-char end-marker) + (delete-region (point) (+ end-len (point))) + (insert (make-string end-len ?\s)))) + (goto-char orig-point)) + ;; Return t so `fill-paragraph' doesn't attempt to fill by + ;; itself. + t))) + +(defun c-ts-mode-comment-setup () + "Set up local variables for C-like comment. + +Set up: + - `comment-start' + - `comment-end' + - `comment-start-skip' + - `comment-end-skip' + - `adaptive-fill-mode' + - `adaptive-fill-first-line-regexp' + - `paragraph-start' + - `paragraph-separate' + - `fill-paragraph-function'" + (setq-local comment-start "// ") + (setq-local comment-end "") + (setq-local comment-start-skip (rx (or (seq "/" (+ "/")) + (seq "/" (+ "*"))) + (* (syntax whitespace)))) + (setq-local comment-end-skip + (rx (* (syntax whitespace)) + (group (or (syntax comment-end) + (seq (+ "*") "/"))))) + (setq-local adaptive-fill-mode t) + ;; This matches (1) empty spaces (the default), (2) "//", (3) "*", + ;; but do not match "/*", because we don't want to use "/*" as + ;; prefix when filling. (Actually, it doesn't matter, because + ;; `comment-start-skip' matches "/*" which will cause + ;; `fill-context-prefix' to use "/*" as a prefix for filling, that's + ;; why we mask the "/*" in `c-ts-mode--fill-paragraph'.) + (setq-local adaptive-fill-regexp + (concat (rx (* (syntax whitespace)) + (group (or (seq "/" (+ "/")) (* "*")))) + adaptive-fill-regexp)) + ;; Note the missing * comparing to `adaptive-fill-regexp'. The + ;; reason for its absence is a bit convoluted to explain. Suffice + ;; to say that without it, filling a single line paragraph that + ;; starts with /* doesn't insert * at the beginning of each + ;; following line, and filling a multi-line paragraph whose first + ;; two lines start with * does insert * at the beginning of each + ;; following line. If you know how does adaptive filling works, you + ;; know what I mean. + (setq-local adaptive-fill-first-line-regexp + (rx bos + (seq (* (syntax whitespace)) + (group (seq "/" (+ "/"))) + (* (syntax whitespace))) + eos)) + ;; Same as `adaptive-fill-regexp'. + (setq-local paragraph-start + (rx (or (seq (* (syntax whitespace)) + (group (or (seq "/" (+ "/")) (* "*"))) + (* (syntax whitespace)) + ;; Add this eol so that in + ;; `fill-context-prefix', `paragraph-start' + ;; doesn't match the prefix. + eol) + "\f"))) + (setq-local paragraph-separate paragraph-start) + (setq-local fill-paragraph-function #'c-ts-mode--fill-paragraph)) + +;;; Modes + (defvar-keymap c-ts-mode-map :doc "Keymap for the C language with tree-sitter" :parent prog-mode-map @@ -587,6 +732,7 @@ the semicolon. This function skips the semicolon." "class_specifier")) #'c-ts-mode--defun-valid-p)) (setq-local treesit-defun-skipper #'c-ts-mode--defun-skipper) + (setq-local treesit-defun-name-function #'c-ts-mode--defun-name) ;; Nodes like struct/enum/union_specifier can appear in ;; function_definitions, so we need to find the top-level node. @@ -596,13 +742,25 @@ the semicolon. This function skips the semicolon." (when (eq c-ts-mode-indent-style 'linux) (setq-local indent-tabs-mode t)) + ;; Comment + (c-ts-mode-comment-setup) + ;; Electric (setq-local electric-indent-chars (append "{}():;," electric-indent-chars)) ;; Imenu. - (setq-local imenu-create-index-function #'c-ts-mode--imenu) - (setq-local which-func-functions nil) + (setq-local treesit-simple-imenu-settings + (let ((pred #'c-ts-mode--defun-valid-p)) + `(("Struct" ,(rx bos (or "struct" "enum" "union") + "_specifier" eos) + ,pred nil) + ("Variable" ,(rx bos "declaration" eos) ,pred nil) + ("Function" "\\`function_definition\\'" ,pred nil) + ("Class" ,(rx bos (or "class_specifier" + "function_definition") + eos) + ,pred nil)))) (setq-local treesit-font-lock-feature-list '(( comment definition) @@ -623,13 +781,6 @@ the semicolon. This function skips the semicolon." ;; Comments. (setq-local comment-start "/* ") (setq-local comment-end " */") - (setq-local comment-start-skip (rx (or (seq "/" (+ "/")) - (seq "/" (+ "*"))) - (* (syntax whitespace)))) - (setq-local comment-end-skip - (rx (* (syntax whitespace)) - (group (or (syntax comment-end) - (seq (+ "*") "/"))))) (setq-local treesit-simple-indent-rules (c-ts-mode--set-indent-style 'c)) @@ -637,37 +788,23 @@ the semicolon. This function skips the semicolon." ;; Font-lock. (setq-local treesit-font-lock-settings (c-ts-mode--font-lock-settings 'c)) - (treesit-major-mode-setup) - - ;; Override default value of end-of-defun-function set by - ;; `treesit-major-mode-setup'. - (setq-local end-of-defun-function #'c-ts-mode--end-of-defun)) + (treesit-major-mode-setup)) ;;;###autoload (define-derived-mode c++-ts-mode c-ts-base-mode "C++" "Major mode for editing C++, powered by tree-sitter." :group 'c++ - :syntax-table c++-ts-mode--syntax-table (unless (treesit-ready-p 'cpp) (error "Tree-sitter for C++ isn't available")) - ;; Comments. - (setq-local comment-start "// ") - (setq-local comment-end "") - (setq-local comment-start-skip (rx (or (seq "/" (+ "/")) - (seq "/" (+ "*"))) - (* (syntax whitespace)))) - (setq-local comment-end-skip - (rx (* (syntax whitespace)) - (group (or (syntax comment-end) - (seq (+ "*") "/"))))) - (setq-local treesit-text-type-regexp (regexp-opt '("comment" "raw_string_literal"))) (treesit-parser-create 'cpp) + (setq-local syntax-propertize-function + #'c-ts-mode--syntax-propertize) (setq-local treesit-simple-indent-rules (c-ts-mode--set-indent-style 'cpp)) @@ -675,11 +812,7 @@ the semicolon. This function skips the semicolon." ;; Font-lock. (setq-local treesit-font-lock-settings (c-ts-mode--font-lock-settings 'cpp)) - (treesit-major-mode-setup) - - ;; Override default value of end-of-defun-function set by - ;; `treesit-major-mode-setup'. - (setq-local end-of-defun-function #'c-ts-mode--end-of-defun)) + (treesit-major-mode-setup)) (provide 'c-ts-mode) |