diff --git a/CHANGELOG.md b/CHANGELOG.md index 4022bcb..aca43c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ - [#96](https://github.com/clojure-emacs/clojure-ts-mode/pull/96): Highlight function name properly in `extend-protocol` form. - [#96](https://github.com/clojure-emacs/clojure-ts-mode/pull/96): Add support for extend-protocol forms to `clojure-ts-add-arity` refactoring command. +- [#99](https://github.com/clojure-emacs/clojure-ts-mode/pull/99): Improve navigation by s-expression by switching to an experimental + Clojure grammar. +- [#99](https://github.com/clojure-emacs/clojure-ts-mode/pull/99): More consistent docstrings highlighting and `fill-paragraph` behavior. +- [#99](https://github.com/clojure-emacs/clojure-ts-mode/pull/99): Fix bug in `clojure-ts-align` when nested form has extra spaces. +- [#99](https://github.com/clojure-emacs/clojure-ts-mode/pull/99): Fix bug in `clojure-ts-unwind` when there is only one expression after + threading symbol. ## 0.4.0 (2025-05-15) diff --git a/README.md b/README.md index 8f73908..7bc21af 100644 --- a/README.md +++ b/README.md @@ -123,11 +123,15 @@ Once installed, evaluate `clojure-ts-mode.el` and you should be ready to go. > `clojure-ts-mode` install the required grammars automatically, so for most > people no manual actions will be required. -`clojure-ts-mode` makes use of two Tree-sitter grammars to work properly: +`clojure-ts-mode` makes use of the following Tree-sitter grammars: -- The Clojure grammar, mentioned earlier -- [markdown-inline](https://github.com/MDeiml/tree-sitter-markdown), which -will be used for docstrings if available and if `clojure-ts-use-markdown-inline` is enabled. +- The [experimental](https://github.com/sogaiu/tree-sitter-clojure/tree/unstable-20250526) version Clojure grammar. This version includes a few + improvements, which potentially will be promoted to a stable release (See [the + discussion](https://github.com/sogaiu/tree-sitter-clojure/issues/65)). This grammar is required for proper work of `clojure-ts-mode`. +- [markdown-inline](https://github.com/MDeiml/tree-sitter-markdown), which will be used for docstrings if available and if + `clojure-ts-use-markdown-inline` is enabled. +- [tree-sitter-regex](https://github.com/tree-sitter/tree-sitter-regex/releases/tag/v0.24.3), which will be used for regex literals if available and if + `clojure-ts-use-regex-parser` is not `nil`. If you have `git` and a C compiler (`cc`) available on your system's `PATH`, `clojure-ts-mode` will install the @@ -136,8 +140,14 @@ set to `t` (the default). If `clojure-ts-mode` fails to automatically install the grammar, you have the option to install it manually, Please, refer to the installation instructions of -each required grammar and make sure you're install the versions expected. (see -`clojure-ts-grammar-recipes` for details) +each required grammar and make sure you're install the versions expected (see +`clojure-ts-grammar-recipes` for details). + +If `clojure-ts-ensure-grammars` is enabled, `clojure-ts-mode` will try to upgrade +the Clojure grammar if it's outdated. This might happen, when you activate +`clojure-ts-mode` for the first time after package update. If grammar was +previously installed, you might need to restart Emacs, because it has to reload +the grammar binary. ### Upgrading Tree-sitter grammars diff --git a/clojure-ts-mode.el b/clojure-ts-mode.el index 8b6bca7..d7f4da3 100644 --- a/clojure-ts-mode.el +++ b/clojure-ts-mode.el @@ -306,7 +306,7 @@ Only intended for use at development time.") "Syntax table for `clojure-ts-mode'.") (defconst clojure-ts--builtin-dynamic-var-regexp - (eval-and-compile + (eval-when-compile (concat "^" (regexp-opt '("*1" "*2" "*3" "*agent*" @@ -323,7 +323,7 @@ Only intended for use at development time.") "$"))) (defconst clojure-ts--builtin-symbol-regexp - (eval-and-compile + (eval-when-compile (concat "^" (regexp-opt '("do" "if" "let*" "var" @@ -341,7 +341,7 @@ Only intended for use at development time.") "defmulti" "defn" "defn-" "defonce" "defprotocol" "defrecord" "defstruct" "deftype" "delay" "doall" "dorun" "doseq" "dosync" "dotimes" "doto" - "extend-protocol" "extend-type" + "extend-protocol" "extend-type" "extend" "for" "future" "gen-class" "gen-interface" "if-let" "if-not" "if-some" "import" "in-ns""io!" @@ -371,72 +371,71 @@ Only intended for use at development time.") "Return a regular expression that matches one of SYMBOLS exactly." (concat "^" (regexp-opt symbols) "$")) -(defvar clojure-ts-function-docstring-symbols - '("definline" - "defmulti" - "defmacro" - "defn" - "defn-" - "defprotocol" - "ns") +(defconst clojure-ts-function-docstring-symbols + (rx line-start + (or "definline" + "defmulti" + "defmacro" + "defn" + "defn-" + "defprotocol" + "ns") + line-end) "Symbols that accept an optional docstring as their second argument.") -(defvar clojure-ts-definition-docstring-symbols - '("def") +(defconst clojure-ts-definition-docstring-symbols + (rx line-start "def" line-end) "Symbols that accept an optional docstring as their second argument. Any symbols added here should only treat their second argument as a docstring if a third argument (the value) is provided. \"def\" is the only builtin Clojure symbol that behaves like this.") (defconst clojure-ts--variable-definition-symbol-regexp - (eval-and-compile - (rx line-start (or "def" "defonce") line-end)) + (rx line-start (or "def" "defonce") line-end) "A regular expression matching a symbol used to define a variable.") (defconst clojure-ts--typedef-symbol-regexp - (eval-and-compile - (rx line-start - (or "defprotocol" "defmulti" "deftype" "defrecord" - "definterface" "defmethod" "defstruct") - line-end)) + (rx line-start + (or "defprotocol" "defmulti" "deftype" "defrecord" + "definterface" "defmethod" "defstruct") + line-end) "A regular expression matching a symbol used to define a type.") (defconst clojure-ts--type-symbol-regexp - (eval-and-compile - (rx line-start - (or "deftype" "defrecord" - ;; While not reifying, helps with doc strings - "defprotocol" "definterface" - "reify" "proxy" "extend-type" "extend-protocol") - line-end)) + (rx line-start + (or "deftype" "defrecord" + ;; While not reifying, helps with doc strings + "defprotocol" "definterface" + "reify" "proxy" "extend-type" "extend-protocol") + line-end) "A regular expression matching a symbol used to define or instantiate a type.") (defconst clojure-ts--interface-def-symbol-regexp - (eval-and-compile - (rx line-start (or "defprotocol" "definterface") line-end)) + (rx line-start (or "defprotocol" "definterface") line-end) "A regular expression matching a symbol used to define an interface.") (defun clojure-ts--docstring-query (capture-symbol) "Return a query that captures docstrings with CAPTURE-SYMBOL." `(;; Captures docstrings in def - ((list_lit :anchor (meta_lit) :? + ((list_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* :anchor (sym_lit) @_def_symbol - :anchor (comment) :? - :anchor (sym_lit) ; variable name - :anchor (comment) :? - :anchor (str_lit) ,capture-symbol - :anchor (_)) ; the variable's value - (:match ,(clojure-ts-symbol-regexp clojure-ts-definition-docstring-symbols) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* + ;; Variable name + :anchor (sym_lit) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (str_lit (str_content) ,capture-symbol) @font-lock-doc-face + ;; The variable's value + :anchor (_)) + (:match ,clojure-ts-definition-docstring-symbols @_def_symbol)) ;; Captures docstrings in metadata of definitions - ((list_lit :anchor (sym_lit) @_def_symbol - :anchor (comment) :? - :anchor (sym_lit - (meta_lit - value: (map_lit - (kwd_lit) @_doc-keyword - :anchor - (str_lit) ,capture-symbol)))) + ((list_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (sym_lit) @_def_symbol + :anchor (comment) :* + :anchor (meta_lit + value: (map_lit + (kwd_lit) @_doc-keyword + :anchor (str_lit (str_content) ,capture-symbol) @font-lock-doc-face))) ;; We're only supporting this on a fixed set of defining symbols ;; Existing regexes don't encompass def and defn ;; Naming another regex is very cumbersome. @@ -448,22 +447,27 @@ if a third argument (the value) is provided. @_def_symbol) (:equal @_doc-keyword ":doc")) ;; Captures docstrings defn, defmacro, ns, and things like that - ((list_lit :anchor (meta_lit) :? + ((list_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* :anchor (sym_lit) @_def_symbol - :anchor (comment) :? - :anchor (sym_lit) ; function_name - :anchor (comment) :? - :anchor (str_lit) ,capture-symbol) - (:match ,(clojure-ts-symbol-regexp clojure-ts-function-docstring-symbols) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* + ;; Function_name + :anchor (sym_lit) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (str_lit (str_content) ,capture-symbol) @font-lock-doc-face) + (:match ,clojure-ts-function-docstring-symbols @_def_symbol)) ;; Captures docstrings in defprotcol, definterface - ((list_lit :anchor (sym_lit) @_def_symbol - (list_lit - :anchor (sym_lit) (vec_lit) :* - (str_lit) ,capture-symbol :anchor) + ((list_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (sym_lit) @_def_symbol + (list_lit :anchor (sym_lit) (vec_lit) :* + (str_lit (str_content) ,capture-symbol) @font-lock-doc-face) :*) (:match ,clojure-ts--interface-def-symbol-regexp @_def_symbol)))) +(defconst clojure-ts--match-docstring-query + (treesit-query-compile 'clojure (clojure-ts--docstring-query '@font-lock-doc-face)) + "Precompiled query that matches a Clojure docstring.") + (defun clojure-ts--treesit-range-settings (use-markdown-inline use-regex) "Return value for `treesit-range-settings' for `clojure-ts-mode'. @@ -476,16 +480,14 @@ When USE-REGEX is non-nil, include range settings for regex parser." (treesit-range-rules :embed 'markdown-inline :host 'clojure - :offset '(1 . -1) :local t (clojure-ts--docstring-query '@capture))) (when use-regex (treesit-range-rules :embed 'regex :host 'clojure - :offset '(2 . -1) :local t - '((regex_lit) @capture))))) + '((regex_content) @capture))))) (defun clojure-ts--font-lock-settings (markdown-available regex-available) "Return font lock settings suitable for use in `treesit-font-lock-settings'. @@ -531,19 +533,21 @@ literals with regex grammar." ;; `clojure.core'. :feature 'builtin :language 'clojure - `(((list_lit meta: _ :* :anchor (sym_lit !namespace name: (sym_name) @font-lock-keyword-face)) + `(((list_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (sym_lit !namespace name: (sym_name) @font-lock-keyword-face)) (:match ,clojure-ts--builtin-symbol-regexp @font-lock-keyword-face)) - ((list_lit meta: _ :* :anchor - (sym_lit namespace: ((sym_ns) @ns - (:equal "clojure.core" @ns)) - name: (sym_name) @font-lock-keyword-face)) + ((list_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (sym_lit namespace: ((sym_ns) @ns + (:equal "clojure.core" @ns)) + name: (sym_name) @font-lock-keyword-face)) (:match ,clojure-ts--builtin-symbol-regexp @font-lock-keyword-face)) - ((anon_fn_lit meta: _ :* :anchor (sym_lit !namespace name: (sym_name) @font-lock-keyword-face)) + ((anon_fn_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (sym_lit !namespace name: (sym_name) @font-lock-keyword-face)) (:match ,clojure-ts--builtin-symbol-regexp @font-lock-keyword-face)) - ((anon_fn_lit meta: _ :* :anchor - (sym_lit namespace: ((sym_ns) @ns - (:equal "clojure.core" @ns)) - name: (sym_name) @font-lock-keyword-face)) + ((anon_fn_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (sym_lit namespace: ((sym_ns) @ns + (:equal "clojure.core" @ns)) + name: (sym_name) @font-lock-keyword-face)) (:match ,clojure-ts--builtin-symbol-regexp @font-lock-keyword-face)) ((sym_name) @font-lock-builtin-face (:match ,clojure-ts--builtin-dynamic-var-regexp @font-lock-builtin-face))) @@ -565,8 +569,9 @@ literals with regex grammar." ;; No wonder the tree-sitter-clojure grammar only touches syntax, and not semantics :feature 'definition ;; defn and defn like macros :language 'clojure - `(((list_lit :anchor meta: _ :* + `(((list_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* :anchor (sym_lit (sym_name) @font-lock-keyword-face) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* :anchor (sym_lit (sym_name) @font-lock-function-name-face)) (:match ,(rx-to-string `(seq bol @@ -579,25 +584,27 @@ literals with regex grammar." "deftest" "deftest-" "defmacro" - "definline") + "definline" + "defonce") eol)) @font-lock-keyword-face)) ((anon_fn_lit marker: "#" @font-lock-property-face)) ;; Methods implementation ((list_lit - ((sym_lit name: (sym_name) @def) - ((:match ,(rx-to-string - `(seq bol - (or - "defrecord" - "definterface" - "deftype" - "defprotocol") - eol)) - @def))) - :anchor - (sym_lit (sym_name) @font-lock-type-face) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor ((sym_lit name: (sym_name) @def) + ((:match ,(rx-to-string + `(seq bol + (or + "defrecord" + "definterface" + "deftype" + "defprotocol") + eol)) + @def))) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* + :anchor (sym_lit (sym_name) @font-lock-type-face) (list_lit (sym_lit name: (sym_name) @font-lock-function-name-face)))) ((list_lit @@ -605,7 +612,8 @@ literals with regex grammar." ((:match ,(rx-to-string `(seq bol (or "reify" - "extend-protocol") + "extend-protocol" + "extend-type") eol)) @def))) (list_lit @@ -620,8 +628,9 @@ literals with regex grammar." :feature 'variable ;; def, defonce :language 'clojure - `(((list_lit :anchor meta: _ :* + `(((list_lit :anchor [(comment) (meta_lit) (old_meta_lit)] :* :anchor (sym_lit (sym_name) @font-lock-keyword-face) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* :anchor (sym_lit (sym_name) @font-lock-variable-name-face)) (:match ,clojure-ts--variable-definition-symbol-regexp @font-lock-keyword-face))) @@ -669,7 +678,7 @@ literals with regex grammar." (treesit-font-lock-rules :feature 'doc :language 'markdown-inline - :override t + :override 'prepend `([((image_description) @link) ((link_destination) @font-lock-constant-face) ((code_span) @font-lock-constant-face) @@ -744,6 +753,7 @@ literals with regex grammar." `((comment) @font-lock-comment-face (dis_expr marker: "#_" @font-lock-comment-delimiter-face + meta: (meta_lit) :* @font-lock-comment-face value: _ @font-lock-comment-face) (,(append '(list_lit :anchor (sym_lit) @font-lock-comment-delimiter-face) @@ -788,7 +798,8 @@ literals with regex grammar." (defun clojure-ts--metadata-node-p (node) "Return non-nil if NODE is a Clojure metadata node." - (string-equal "meta_lit" (treesit-node-type node))) + (or (string-equal "meta_lit" (treesit-node-type node)) + (string-equal "old_meta_lit" (treesit-node-type node)))) (defun clojure-ts--var-node-p (node) "Return non-nil if NODE is a var (eg. #\\'foo)." @@ -821,11 +832,16 @@ Skip the optional metadata node at pos 0 if present." n) t))) -(defun clojure-ts--node-with-metadata-parent (node) - "Return parent for NODE only if NODE has metadata, otherwise return nil." - (when-let* ((prev-sibling (treesit-node-prev-sibling node)) - ((clojure-ts--metadata-node-p prev-sibling))) - (treesit-node-parent (treesit-node-parent node)))) +(defun clojure-ts--first-value-child (node) + "Returns the first value child of the given NODE. + +In the syntax tree, there are a few types of possible child nodes: +unnamed standalone nodes (e.g., comments), anonymous nodes (e.g., +opening or closing parentheses), and named nodes. Named nodes are +standalone nodes that are labeled by a specific name. The most common +names are meta and value. This function skips any unnamed, anonymous, +and metadata nodes and returns the first value node." + (treesit-node-child-by-field-name node "value")) (defun clojure-ts--symbol-matches-p (symbol-regexp node) "Return non-nil if NODE is a symbol that matches SYMBOL-REGEXP." @@ -1043,6 +1059,7 @@ The possible values for this variable are ("try" . ((:block 0))) ("with-out-str" . ((:block 0))) ("defprotocol" . ((:block 1) (:inner 1))) + ("definterface" . ((:block 1) (:inner 1))) ("binding" . ((:block 1))) ("case" . ((:block 1))) ("cond->" . ((:block 1))) @@ -1299,31 +1316,31 @@ indentation rule in `clojure-ts--semantic-indent-rules-defaults' or according to the rule. If NODE is nil, use next node after BOL." (and (or (clojure-ts--list-node-p parent) (clojure-ts--anon-fn-node-p parent)) - (let* ((first-child (clojure-ts--node-child-skip-metadata parent 0))) + (let* ((first-child (clojure-ts--first-value-child parent))) (when-let* ((rule (clojure-ts--find-semantic-rule node parent 0))) - (and (not (clojure-ts--match-with-metadata node)) - (let ((rule-type (car rule)) - (rule-value (cadr rule))) - (if (equal rule-type :block) - (if (zerop rule-value) - ;; Special treatment for block 0 rule. - (clojure-ts--match-block-0-body bol first-child) - (clojure-ts--node-pos-match-block node parent bol rule-value)) - ;; Return true for any inner rule. - t))))))) + (let ((rule-type (car rule)) + (rule-value (cadr rule))) + (if (equal rule-type :block) + (if (zerop rule-value) + ;; Special treatment for block 0 rule. + (clojure-ts--match-block-0-body bol first-child) + (clojure-ts--node-pos-match-block node parent bol rule-value)) + ;; Return true for any inner rule. + t)))))) (defun clojure-ts--match-function-call-arg (node parent _bol) "Match NODE if PARENT is a list expressing a function or macro call." (and (or (clojure-ts--list-node-p parent) (clojure-ts--anon-fn-node-p parent)) - ;; Can the following two clauses be replaced by checking indexes? - ;; Does the second child exist, and is it not equal to the current node? - (clojure-ts--node-child-skip-metadata parent 1) - (not (treesit-node-eq (clojure-ts--node-child-skip-metadata parent 1) node)) - (let ((first-child (clojure-ts--node-child-skip-metadata parent 0))) - (or (clojure-ts--symbol-node-p first-child) - (clojure-ts--keyword-node-p first-child) - (clojure-ts--var-node-p first-child))))) + (let ((first-child (clojure-ts--first-value-child parent)) + (second-child (clojure-ts--node-child-skip-metadata parent 1))) + (and first-child + ;; Does the second child exist, and is it not equal to the current node? + second-child + (not (treesit-node-eq second-child node)) + (or (clojure-ts--symbol-node-p first-child) + (clojure-ts--keyword-node-p first-child) + (clojure-ts--var-node-p first-child)))))) (defvar clojure-ts--threading-macro (eval-and-compile @@ -1336,55 +1353,25 @@ according to the rule. If NODE is nil, use next node after BOL." ;; If not, then align function arg. (and (or (clojure-ts--list-node-p parent) (clojure-ts--anon-fn-node-p parent)) - (let ((first-child (treesit-node-child parent 0 t))) + (let ((first-child (clojure-ts--first-value-child parent))) (clojure-ts--symbol-matches-p clojure-ts--threading-macro first-child)))) -(defun clojure-ts--match-fn-docstring (node) - "Match NODE when it is a docstring for PARENT function definition node." - ;; A string that is the third node in a function defn block - (let ((parent (treesit-node-parent node))) - (and (treesit-node-eq node (treesit-node-child parent 2 t)) - (let ((first-auncle (treesit-node-child parent 0 t))) - (clojure-ts--symbol-matches-p - (regexp-opt clojure-ts-function-docstring-symbols) - first-auncle))))) - -(defun clojure-ts--match-def-docstring (node) - "Match NODE when it is a docstring for PARENT variable definition node." - ;; A string that is the fourth node in a variable definition block. - (let ((parent (treesit-node-parent node))) - (and (treesit-node-eq node (treesit-node-child parent 2 t)) - ;; There needs to be a value after the string. - ;; If there is no 4th child, then this string is the value. - (treesit-node-child parent 3 t) - (let ((first-auncle (treesit-node-child parent 0 t))) - (clojure-ts--symbol-matches-p - (regexp-opt clojure-ts-definition-docstring-symbols) - first-auncle))))) - -(defun clojure-ts--match-method-docstring (node) - "Match NODE when it is a docstring in a method definition." - (let* ((grandparent (treesit-node-parent ;; the protocol/interface - (treesit-node-parent node))) ;; the method definition - (first-grandauncle (treesit-node-child grandparent 0 t))) - (clojure-ts--symbol-matches-p - clojure-ts--interface-def-symbol-regexp - first-grandauncle))) - (defun clojure-ts--match-docstring (_node parent _bol) "Match PARENT when it is a docstring node." - (and (clojure-ts--string-node-p parent) ;; We are IN a string - (or (clojure-ts--match-def-docstring parent) - (clojure-ts--match-fn-docstring parent) - (clojure-ts--match-method-docstring parent)))) + (when-let* ((top-level-node (treesit-parent-until parent 'defun t)) + (result (treesit-query-capture top-level-node + clojure-ts--match-docstring-query))) + (seq-find (lambda (elt) + (and (eq (car elt) 'font-lock-doc-face) + (treesit-node-eq (cdr elt) parent))) + result))) (defun clojure-ts--match-with-metadata (node &optional _parent _bol) "Match NODE when it has metadata." - (let ((prev-sibling (treesit-node-prev-sibling node))) - (and prev-sibling - (clojure-ts--metadata-node-p prev-sibling)))) + (when-let* ((prev-sibling (treesit-node-prev-sibling node))) + (clojure-ts--metadata-node-p prev-sibling))) (defun clojure-ts--anchor-parent-opening-paren (_node parent _bol) "Return position of PARENT start for NODE. @@ -1398,21 +1385,10 @@ for forms with type hints." (treesit-search-subtree #'clojure-ts--opening-paren-node-p nil t 1) (treesit-node-start))) -(defun clojure-ts--match-collection-item-with-metadata (node-type) - "Return a matcher for a collection item with metadata by NODE-TYPE. - -The returned matcher accepts NODE, PARENT and BOL and returns true only -if NODE has metadata and its parent has type NODE-TYPE." - (lambda (node _parent _bol) - (string-equal node-type - (treesit-node-type - (clojure-ts--node-with-metadata-parent node))))) - (defun clojure-ts--anchor-nth-sibling (n) "Return the start of the Nth child of PARENT skipping metadata." (lambda (_n parent &rest _) - (treesit-node-start - (clojure-ts--node-child-skip-metadata parent n)))) + (treesit-node-start (treesit-node-child parent n t)))) (defun clojure-ts--semantic-indent-rules () "Return a list of indentation rules for `treesit-simple-indent-rules'. @@ -1425,19 +1401,6 @@ used." `((clojure ((parent-is "^source$") parent-bol 0) (clojure-ts--match-docstring parent 0) - ;; Collections items with metadata. - ;; - ;; This should be before `clojure-ts--match-with-metadata', otherwise they - ;; will never be matched. - (,(clojure-ts--match-collection-item-with-metadata "^vec_lit$") grand-parent 1) - (,(clojure-ts--match-collection-item-with-metadata "^map_lit$") grand-parent 1) - (,(clojure-ts--match-collection-item-with-metadata "^set_lit$") grand-parent 2) - ;; - ;; If we enable this rule for lists, it will break many things. - ;; (,(clojure-ts--match-collection-item-with-metadata "list_lit") grand-parent 1) - ;; - ;; All other forms with metadata. - (clojure-ts--match-with-metadata parent 0) ;; Literal Sequences ((parent-is "^vec_lit$") parent 1) ;; https://guide.clojure.style/#bindings-alignment ((parent-is "^map_lit$") parent 1) ;; https://guide.clojure.style/#map-keys-alignment @@ -1453,7 +1416,9 @@ used." ;; https://guide.clojure.style/#vertically-align-fn-args (clojure-ts--match-function-call-arg ,(clojure-ts--anchor-nth-sibling 1) 0) ;; https://guide.clojure.style/#one-space-indent - ((parent-is "^list_lit$") parent 1)))) + ((parent-is "^list_lit$") parent 1) + ((parent-is "^anon_fn_lit$") parent 2) + (clojure-ts--match-with-metadata parent 0)))) (defun clojure-ts--configured-indent-rules () "Gets the configured choice of indent rules." @@ -1496,7 +1461,7 @@ of the first symbol of a functional literal NODE." (when (or (clojure-ts--list-node-p node) (and include-anon-fn-lit (clojure-ts--anon-fn-node-p node))) - (when-let* ((first-child (clojure-ts--node-child-skip-metadata node 0)) + (when-let* ((first-child (clojure-ts--first-value-child node)) ((clojure-ts--symbol-node-p first-child))) (clojure-ts--named-node-text first-child)))) @@ -1536,7 +1501,15 @@ function literal." "definline" "defrecord" "defmacro" - "defmulti") + "defmulti" + "defonce" + "defprotocol" + "deftest" + "deftest-" + "ns" + "definterface" + "deftype" + "defstruct") eol))) (defconst clojure-ts--markdown-inline-sexp-nodes @@ -1545,10 +1518,22 @@ function literal." "code_span") "Nodes representing s-expressions in the `markdown-inline' parser.") +(defun clojure-ts--default-sexp-node-p (node) + "Return TRUE if point is after the # marker of set or function literal NODE." + (and (eq (char-before) ?\#) + (string-match-p (rx bol (or "anon_fn_lit" "set_lit") eol) + (treesit-node-type (treesit-node-parent node))))) + (defconst clojure-ts--thing-settings `((clojure (sexp ,(regexp-opt clojure-ts--sexp-nodes)) (list ,(regexp-opt clojure-ts--list-nodes)) + ;; `sexp-default' thing allows to fallback to the default implementation of + ;; `forward-sexp' function where `treesit-forward-sexp' produces undesired + ;; results. + (sexp-default + ;; For `C-M-f' in "#|(a)" or "#|{1 2 3}" + (,(rx (or "(" "{")) . ,#'clojure-ts--default-sexp-node-p)) (text ,(regexp-opt '("comment"))) (defun ,#'clojure-ts--defun-node-p)) (when clojure-ts-use-markdown-inline @@ -1598,11 +1583,11 @@ BOUND bounds the whitespace search." (point)) (when-let* ((cur-sexp (treesit-node-first-child-for-pos root-node (point) t))) (goto-char (treesit-node-start cur-sexp)) - (if (and (string= "sym_lit" (treesit-node-type cur-sexp)) - (clojure-ts--metadata-node-p (treesit-node-child cur-sexp 0 t)) - (and (not (treesit-node-child-by-field-name cur-sexp "value")) - (string-empty-p (clojure-ts--named-node-text cur-sexp)))) - (treesit-end-of-thing 'sexp 2 'restricted) + (if (clojure-ts--metadata-node-p cur-sexp) + (progn + (treesit-end-of-thing 'sexp 1 'restricted) + (just-one-space) + (treesit-end-of-thing 'sexp 1 'restricted)) (treesit-end-of-thing 'sexp 1 'restricted)) (when (looking-at-p ",") (forward-char)) @@ -1635,6 +1620,33 @@ BOUND bounds the whitespace search." sexp-end t))) +(defvar clojure-ts--align-query + (treesit-query-compile 'clojure + (append + `(((map_lit) @map) + ((ns_map_lit) @ns-map) + ((list_lit + ((sym_lit) @sym + (:match ,(clojure-ts-symbol-regexp clojure-ts-align-binding-forms) @sym)) + (vec_lit) @bindings-vec)) + ((list_lit + ((sym_lit) @sym + (:match ,(clojure-ts-symbol-regexp clojure-ts-align-cond-forms) @sym))) + @cond) + ((anon_fn_lit + ((sym_lit) @sym + (:match ,(clojure-ts-symbol-regexp clojure-ts-align-binding-forms) @sym)) + (vec_lit) @bindings-vec)) + ((anon_fn_lit + ((sym_lit) @sym + (:match ,(clojure-ts-symbol-regexp clojure-ts-align-cond-forms) @sym))) + @cond))))) + +(defvar clojure-ts--align-reader-conditionals-query + (treesit-query-compile 'clojure + '(((read_cond_lit) @read-cond) + ((splicing_read_cond_lit) @read-cond)))) + (defun clojure-ts--get-nodes-to-align (beg end) "Return a plist of nodes data for alignment. @@ -1649,31 +1661,15 @@ have changed." ;; By default `treesit-query-capture' captures all nodes that cross the range. ;; We need to restrict it to only nodes inside of the range. (let* ((region-node (clojure-ts--region-node beg end)) - (query (treesit-query-compile 'clojure - (append - `(((map_lit) @map) - ((ns_map_lit) @ns-map) - ((list_lit - ((sym_lit) @sym - (:match ,(clojure-ts-symbol-regexp clojure-ts-align-binding-forms) @sym)) - (vec_lit) @bindings-vec)) - ((list_lit - ((sym_lit) @sym - (:match ,(clojure-ts-symbol-regexp clojure-ts-align-cond-forms) @sym))) - @cond) - ((anon_fn_lit - ((sym_lit) @sym - (:match ,(clojure-ts-symbol-regexp clojure-ts-align-binding-forms) @sym)) - (vec_lit) @bindings-vec)) - ((anon_fn_lit - ((sym_lit) @sym - (:match ,(clojure-ts-symbol-regexp clojure-ts-align-cond-forms) @sym))) - @cond)) - (when clojure-ts-align-reader-conditionals - '(((read_cond_lit) @read-cond) - ((splicing_read_cond_lit) @read-cond))))))) - (thread-last (treesit-query-capture region-node query beg end) + (nodes (append (treesit-query-capture region-node clojure-ts--align-query beg end) + (when clojure-ts-align-reader-conditionals + (treesit-query-capture region-node clojure-ts--align-reader-conditionals-query beg end))))) + (thread-last nodes (seq-remove (lambda (elt) (eq (car elt) 'sym))) + ;; Reverse the result to align the most deeply nested nodes + ;; first. This way we can prevent breaking alignment of outer + ;; nodes. + (seq-reverse) ;; When first node is reindented, all other nodes become ;; outdated. Executing the entire query everytime is very ;; expensive, instead we use markers for every captured node to @@ -1913,6 +1909,7 @@ parenthesis." (delete-region beg (point)) ;; `raise-sexp' doesn't work properly for function literals (it loses one ;; of the parenthesis). Seems like an Emacs' bug. + (backward-up-list) (delete-pair)))) (defun clojure-ts--fix-sexp-whitespace () @@ -1952,19 +1949,25 @@ With universal argument \\[universal-argument], fully unwinds thread." (end (thread-first threading-sexp (treesit-node-end) (copy-marker)))) - (while (> n 0) - (cond - ((string-match-p (rx bol (* "some") "->" eol) sym) - (clojure-ts--unwind-thread-first)) - ((string-match-p (rx bol (* "some") "->>" eol) sym) - (clojure-ts--unwind-thread-last))) - (setq n (1- n)) - ;; After unwinding we check if it is the last expression and maybe - ;; splice it. - (when (clojure-ts--nothing-more-to-unwind) - (clojure-ts--pop-out-of-threading) - (clojure-ts--fix-sexp-whitespace) - (setq n 0))) + ;; If it's the last expression, just raise it out of the threading + ;; macro. + (if (clojure-ts--nothing-more-to-unwind) + (progn + (clojure-ts--pop-out-of-threading) + (clojure-ts--fix-sexp-whitespace)) + (while (> n 0) + (cond + ((string-match-p (rx bol (* "some") "->" eol) sym) + (clojure-ts--unwind-thread-first)) + ((string-match-p (rx bol (* "some") "->>" eol) sym) + (clojure-ts--unwind-thread-last))) + (setq n (1- n)) + ;; After unwinding we check if it is the last expression and maybe + ;; splice it. + (when (clojure-ts--nothing-more-to-unwind) + (clojure-ts--pop-out-of-threading) + (clojure-ts--fix-sexp-whitespace) + (setq n 0)))) (indent-region beg end) (delete-trailing-whitespace beg end))) (user-error "No threading form to unwind at point"))) @@ -2117,9 +2120,9 @@ type, etc. See `treesit-thing-settings' for more details." (defun clojure-ts--add-arity-internal (fn-node) "Add an arity to a function defined by FN-NODE." (let* ((first-coll (clojure-ts--node-child fn-node (rx bol (or "vec_lit" "list_lit") eol))) - (coll-start (clojure-ts--node-start-skip-metadata first-coll)) + (coll-start (treesit-node-start first-coll)) (line-parent (thread-first fn-node - (clojure-ts--node-child-skip-metadata 0) + (clojure-ts--first-value-child) (treesit-node-start) (line-number-at-pos))) (line-args (line-number-at-pos coll-start)) @@ -2138,7 +2141,7 @@ type, etc. See `treesit-thing-settings' for more details." (defun clojure-ts--add-arity-defprotocol-internal (fn-node) "Add an arity to a defprotocol function defined by FN-NODE." (let* ((args-vec (clojure-ts--node-child fn-node (rx bol "vec_lit" eol))) - (args-vec-start (clojure-ts--node-start-skip-metadata args-vec)) + (args-vec-start (treesit-node-start args-vec)) (line-parent (thread-first fn-node (clojure-ts--node-child-skip-metadata 0) (treesit-node-start) @@ -2158,7 +2161,7 @@ type, etc. See `treesit-thing-settings' for more details." (defun clojure-ts--add-arity-reify-internal (fn-node) "Add an arity to a reify function defined by FN-NODE." (let* ((fn-name (clojure-ts--list-node-sym-text fn-node))) - (goto-char (clojure-ts--node-start-skip-metadata fn-node)) + (goto-char (treesit-node-start fn-node)) (insert "(" fn-name " [])") (newline-and-indent) ;; Put the point between sqare brackets. @@ -2340,7 +2343,7 @@ before DELIM-OPEN." ("when" "when-not") ("when-not" "when")))) (save-excursion - (goto-char (clojure-ts--node-start-skip-metadata cond-node)) + (goto-char (treesit-node-start cond-node)) (down-list 1) (delete-char (length cond-sym)) (insert new-sym) @@ -2350,25 +2353,10 @@ before DELIM-OPEN." (indent-region beg end-marker))) (user-error "No conditional expression found"))) -(defun clojure-ts--point-outside-node-p (node) - "Return non-nil if point is outside of the actual NODE start. - -Clojure grammar treats metadata as part of an expression, so for example -^boolean (not (= 2 2)) is a single list node, including metadata. This -causes issues for functions that navigate by s-expressions and lists. -This function returns non-nil if point is outside of the outermost -parenthesis." - (let* ((actual-node-start (clojure-ts--node-start-skip-metadata node)) - (node-end (treesit-node-end node)) - (pos (point))) - (or (< pos actual-node-start) - (> pos node-end)))) - (defun clojure-ts-cycle-not () "Add or remove a not form around the current form." (interactive) - (if-let* ((list-node (clojure-ts--parent-until (rx bol "list_lit" eol))) - ((not (clojure-ts--point-outside-node-p list-node)))) + (if-let* ((list-node (clojure-ts--parent-until (rx bol "list_lit" eol)))) (let ((beg (treesit-node-start list-node)) (end-marker (copy-marker (treesit-node-end list-node))) (pos (copy-marker (point) t))) @@ -2475,7 +2463,7 @@ parenthesis." (defconst clojure-ts-grammar-recipes '((clojure "https://github.com/sogaiu/tree-sitter-clojure.git" - "v0.0.13") + "unstable-20250526") (markdown-inline "https://github.com/MDeiml/tree-sitter-markdown" "v0.4.1" "tree-sitter-markdown-inline/src") @@ -2483,12 +2471,25 @@ parenthesis." "v0.24.3")) "Intended to be used as the value for `treesit-language-source-alist'.") +(defun clojure-ts--clojure-grammar-outdated-p () + "Return TRUE if currently installed grammar is outdated. + +This function checks if `clojure-ts-mode' is compatible with the +currently installed grammar. The simplest way to do this is to validate +a query that is valid in a previous grammar version but invalid in the +required version." + (treesit-query-valid-p 'clojure '((sym_lit (meta_lit))))) + (defun clojure-ts--ensure-grammars () "Install required language grammars if not already available." (when clojure-ts-ensure-grammars (dolist (recipe clojure-ts-grammar-recipes) (let ((grammar (car recipe))) - (unless (treesit-language-available-p grammar nil) + (when (or (not (treesit-language-available-p grammar nil)) + ;; If Clojure grammar is available, but outdated, re-install + ;; it. + (and (equal grammar 'clojure) + (clojure-ts--clojure-grammar-outdated-p))) (message "Installing %s Tree-sitter grammar" grammar) ;; `treesit-language-source-alist' is dynamically scoped. ;; Binding it in this let expression allows @@ -2686,11 +2687,15 @@ Useful if you want to switch to the `clojure-mode's mode mappings." (treesit-query-compile 'clojure '(((source (list_lit + :anchor [(comment) (meta_lit) (old_meta_lit)] :* :anchor (sym_lit name: (sym_name) @ns) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* :anchor (sym_lit name: (sym_name) @ns-name))) (:equal @ns "ns")) ((source (list_lit + :anchor [(comment) (meta_lit) (old_meta_lit)] :* :anchor (sym_lit name: (sym_name) @in-ns) + :anchor [(comment) (meta_lit) (old_meta_lit)] :* :anchor (quoting_lit :anchor (sym_lit name: (sym_name) @ns-name)))) (:equal @in-ns "in-ns"))))) diff --git a/doc/design.md b/doc/design.md index 8afeaff..4c19319 100644 --- a/doc/design.md +++ b/doc/design.md @@ -32,29 +32,43 @@ In short: ## tree-sitter-clojure -Clojure-ts-mode uses the tree-sitter-clojure grammar, which can be found at -The clojure-ts-mode grammar provides very basic, low level nodes that try to match Clojure's very light syntax. +`clojure-ts-mode` uses the experimental version tree-sitter-clojure grammar, which +can be found at +. The +`clojure-ts-mode` grammar provides very basic, low level nodes that try to match +Clojure's very light syntax. There are nodes to represent: -- Symbols (sym_lit) - - Contain (sym_ns) and (sym_name) nodes -- Keywords (kwd_lit) - - Contain (kwd_ns) and (kw_name) nodes -- Strings (str_lit) -- Chars (char_lit) -- Nil (nil_lit) -- Booleans (bool_lit) -- Numbers (num_lit) -- Comments (comment, dis_expr) - - dis_expr is the `#_` discard expression -- Lists (list_list) -- Vectors (vec_lit) -- Maps (map_lit) -- Sets (set_lit) - -There are also nodes to represent metadata, which appear on `meta:` child fields of the nodes the metadata is defined on. -For example a simple vector with metadata defined on it like so +- Symbols `(sym_lit)` + - Contain `(sym_ns)` and `(sym_name)` nodes +- Keywords `(kwd_lit)` + - Contain `(kwd_ns)` and `(kw_name)` nodes +- Strings `(str_lit)` + - Contains `(str_content)` node +- Chars `(char_lit)` +- Nil `(nil_lit)` +- Booleans `(bool_lit)` +- Numbers `(num_lit)` +- Comments `(comment, dis_expr)` + - `dis_expr` is the `#_` discard expression +- Lists `(list_list)` +- Vectors `(vec_lit)` +- Maps `(map_lit)` +- Sets `(set_lit)` +- Metadata nodes `(meta_lit)` +- Regex content `(regex_content)` +- Function literals `(anon_fn_lit)` + +The best place to learn more about the tree-sitter-clojure grammar is to read +the [grammar.js file from the tree-sitter-clojure repository](https://github.com/sogaiu/tree-sitter-clojure/blob/master/grammar.js "grammar.js"). + +### Difference between stable grammar and experimental + +#### Standalone metadata nodes + +Metadata nodes in stable grammar appear as child nodes of the nodes the metadata +is defined on. For example a simple vector with metadata defined on it like so: ```clojure ^:has-metadata [1] @@ -69,7 +83,27 @@ will produce a parse tree like so value: (num_lit)) ``` -The best place to learn more about the tree-sitter-clojure grammar is to read the [grammar.js file from the tree-sitter-clojure repository](https://github.com/sogaiu/tree-sitter-clojure/blob/master/grammar.js "grammar.js"). +Although it's somewhat closer to hoe Clojure treats metadata itself, in the +context of a text editor it creates some problems, which were discussed [here](https://github.com/sogaiu/tree-sitter-clojure/issues/65). To +name a few: +- `forward-sexp` command would skip both, metadata and the node it's attached + to. Called from an opening paren it would signal an error "No more sexp to + move across". +- `kill-sexp` command would kill both, metadata and the node it's attached to. +- `backward-up-list` called from the inside of a list with metadata would move + point to the beginning of metadata node. +- Internally we had to introduce some workarounds to skip metadata nodes or + figure out where the actual node starts. + +#### Special nodes for string content and regex content + +To parse the content of certain strings with a separate grammar, it is necessary +to extract the string's content, excluding its opening and closing quotes. To +achieve this, Emacs 31 allows specifying offsets for `treesit-range-settings`. +However, in Emacs 30.1, this feature is broken due to bug [#77848](https://debbugs.gnu.org/cgi/bugreport.cgi?bug=77848) (a fix is +anticipated in Emacs 30.2). The presence of `str_content` and `regex_content` nodes +allows us to support this feature across all Emacs versions without relying on +offsets. ### Clojure Syntax, not Clojure Semantics diff --git a/test/clojure-ts-mode-refactor-threading-test.el b/test/clojure-ts-mode-refactor-threading-test.el index ce26d5d..35e1ebb 100644 --- a/test/clojure-ts-mode-refactor-threading-test.el +++ b/test/clojure-ts-mode-refactor-threading-test.el @@ -205,6 +205,13 @@ (clojure-ts-unwind) (clojure-ts-unwind)) + (when-refactoring-it "should work correctly when there is only one expression" + "(->> (filter even? [1 2 3 4]))" + + "(filter even? [1 2 3 4])" + + (clojure-ts-unwind)) + (when-refactoring-it "should unwind N steps with numeric prefix arg" "(->> [1 2 3 4 5] (filter even?) diff --git a/test/samples/indentation.clj b/test/samples/indentation.clj index 132a5f2..52b417e 100644 --- a/test/samples/indentation.clj +++ b/test/samples/indentation.clj @@ -228,15 +228,24 @@ :foo "bar"} -;; NOTE: List elements with metadata are not indented correctly. '(one two ^:foo - three) + three) ^{:nextjournal.clerk/visibility {:code :hide}} (defn actual [args]) +(println "Hello" + "World") + +#(println + "hello" + %) + +#(println "hello" + %) + (def ^:private hello "World") diff --git a/test/samples/navigation.clj b/test/samples/navigation.clj new file mode 100644 index 0000000..26bdf44 --- /dev/null +++ b/test/samples/navigation.clj @@ -0,0 +1,14 @@ +(ns navigation) + +(let [my-var ^{:foo "bar"} (= "Hello" "Hello")]) + +(let [my-var ^boolean (= "Hello" "world")]) + +#(+ % %) + +^boolean (= 2 2) + +(defn- to-string + ^String + [arg] + (.toString arg))