diff options
author | Mattias Engdegård <mattiase@acm.org> | 2019-09-25 14:29:50 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2019-09-25 14:29:50 -0700 |
commit | 2ed71227c626c6cfdc684948644ccf3d9eaeb15b (patch) | |
tree | 2a4043ce8036206c7138b9bf5b149da8c66ec811 /test | |
parent | a773a6474897356cd78aeea092d2c1a51ede23f9 (diff) | |
download | emacs-2ed71227c626c6cfdc684948644ccf3d9eaeb15b.tar.gz |
New rx implementation
* lisp/emacs-lisp/rx.el:
* test/lisp/emacs-lisp/rx-tests.el:
* doc/lispref/searching.texi (Rx Constructs):
Rewrite rx for correctness, clarity, and performance. The new
implementation retains full compatibility and has more comprehensive
tests.
* lisp/emacs-lisp/re-builder.el (reb-rx-font-lock-keywords):
Adapt to changes in internal variables in rx.el.
Diffstat (limited to 'test')
-rw-r--r-- | test/lisp/emacs-lisp/rx-tests.el | 336 |
1 files changed, 247 insertions, 89 deletions
diff --git a/test/lisp/emacs-lisp/rx-tests.el b/test/lisp/emacs-lisp/rx-tests.el index 8845ebf46d1..fec046dd991 100644 --- a/test/lisp/emacs-lisp/rx-tests.el +++ b/test/lisp/emacs-lisp/rx-tests.el @@ -1,4 +1,4 @@ -;;; rx-tests.el --- test for rx.el functions -*- lexical-binding: t -*- +;;; rx-tests.el --- tests for rx.el -*- lexical-binding: t -*- ;; Copyright (C) 2016-2019 Free Software Foundation, Inc. @@ -17,21 +17,44 @@ ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. -;;; Commentary: - (require 'ert) (require 'rx) -;;; Code: +(ert-deftest rx-seq () + (should (equal (rx "a.b" "*" "c") + "a\\.b\\*c")) + (should (equal (rx (seq "a" (: "b" (and "c" (sequence "d" nonl) + "e") + "f") + "g")) + "abcd.efg")) + (should (equal (rx "a$" "b") + "a\\$b")) + (should (equal (rx bol "a" "b" ?c eol) + "^abc$")) + (should (equal (rx "a" "" "b") + "ab")) + (should (equal (rx (seq)) + "")) + (should (equal (rx "" (or "ab" nonl) "") + "ab\\|."))) + +(ert-deftest rx-or () + (should (equal (rx (or "ab" (| "c" nonl) "de")) + "ab\\|c\\|.\\|de")) + (should (equal (rx (or "ab" "abc" "a")) + "\\(?:ab\\|abc\\|a\\)")) + (should (equal (rx (| nonl "a") (| "b" blank)) + "\\(?:.\\|a\\)\\(?:b\\|[[:blank:]]\\)")) + (should (equal (rx (|)) + "\\`a\\`"))) (ert-deftest rx-char-any () "Test character alternatives with `]' and `-' (Bug#25123)." - (should (string-match + (should (equal (rx string-start (1+ (char (?\] . ?\{) (?< . ?\]) (?- . ?:))) string-end) - (apply #'string (nconc (number-sequence ?\] ?\{) - (number-sequence ?< ?\]) - (number-sequence ?- ?:)))))) + "\\`[.-:<-{-]+\\'"))) (ert-deftest rx-char-any-range-nl () "Test character alternatives with LF as a range endpoint." @@ -40,28 +63,72 @@ (should (equal (rx (any "\a-\n")) "[\a-\n]"))) -(ert-deftest rx-char-any-range-bad () - (should-error (rx (any "0-9a-Z"))) - (should-error (rx (any (?0 . ?9) (?a . ?Z))))) - (ert-deftest rx-char-any-raw-byte () "Test raw bytes in character alternatives." + + ;; The multibyteness of the rx return value sometimes depends on whether + ;; the test had been byte-compiled or not, so we add explicit conversions. + ;; Separate raw characters. - (should (equal (string-match-p (rx (any "\326A\333B")) - "X\326\333") - 1)) + (should (equal (string-to-multibyte (rx (any "\326A\333B"))) + (string-to-multibyte "[AB\326\333]"))) ;; Range of raw characters, unibyte. - (should (equal (string-match-p (rx (any "\200-\377")) - "ÿA\310B") - 2)) + (should (equal (string-to-multibyte (rx (any "\200-\377"))) + (string-to-multibyte "[\200-\377]"))) + ;; Range of raw characters, multibyte. - (should (equal (string-match-p (rx (any "Å\211\326-\377\177")) - "XY\355\177\327") - 2)) + (should (equal (rx (any "Å\211\326-\377\177")) + "[\177Å\211\326-\377]")) ;; Split range; \177-\377ÿ should not be optimised to \177-\377. - (should (equal (string-match-p (rx (any "\177-\377" ?ÿ)) - "ÿA\310B") - 0))) + (should (equal (rx (any "\177-\377" ?ÿ)) + "[\177ÿ\200-\377]"))) + +(ert-deftest rx-any () + (should (equal (rx (any ?A (?C . ?D) "F-H" "J-L" "M" "N-P" "Q" "RS")) + "[ACDF-HJ-S]")) + (should (equal (rx (in "a!f" ?c) (char "q-z" "0-3") + (not-char "a-e1-5") (not (in "A-M" ?q))) + "[!acf][0-3q-z][^1-5a-e][^A-Mq]")) + (should (equal (rx (any "^") (any "]") (any "-") + (not (any "^")) (not (any "]")) (not (any "-"))) + "\\^]-[^^][^]][^-]")) + (should (equal (rx (any "]" "^") (any "]" "-") (any "-" "^") + (not (any "]" "^")) (not (any "]" "-")) + (not (any "-" "^"))) + "[]^][]-][-^][^]^][^]-][^-^]")) + (should (equal (rx (any "]" "^" "-") (not (any "]" "^" "-"))) + "[]^-][^]^-]")) + (should (equal (rx (any "-" ascii) (any "^" ascii) (any "]" ascii)) + "[[:ascii:]-][[:ascii:]^][][:ascii:]]")) + (should (equal (rx (not (any "-" ascii)) (not (any "^" ascii)) + (not (any "]" ascii))) + "[^[:ascii:]-][^[:ascii:]^][^][:ascii:]]")) + (should (equal (rx (any "-]" ascii) (any "^]" ascii) (any "-^" ascii)) + "[][:ascii:]-][]^[:ascii:]][[:ascii:]^-]")) + (should (equal (rx (not (any "-]" ascii)) (not (any "^]" ascii)) + (not (any "-^" ascii))) + "[^][:ascii:]-][^]^[:ascii:]][^[:ascii:]^-]")) + (should (equal (rx (any "-]^" ascii) (not (any "-]^" ascii))) + "[]^[:ascii:]-][^]^[:ascii:]-]")) + (should (equal (rx (any "^" lower upper) (not (any "^" lower upper))) + "[[:lower:]^[:upper:]][^[:lower:]^[:upper:]]")) + (should (equal (rx (any "-" lower upper) (not (any "-" lower upper))) + "[[:lower:][:upper:]-][^[:lower:][:upper:]-]")) + (should (equal (rx (any "]" lower upper) (not (any "]" lower upper))) + "[][:lower:][:upper:]][^][:lower:][:upper:]]")) + (should (equal (rx (any "-a" "c-" "f-f" "--/*--")) + "[*-/acf]")) + (should (equal (rx (any "]-a" ?-) (not (any "]-a" ?-))) + "[]-a-][^]-a-]")) + (should (equal (rx (any "--]") (not (any "--]")) + (any "-" "^-a") (not (any "-" "^-a"))) + "[].-\\-][^].-\\-][-^-a][^-^-a]")) + (should (equal (rx (not (any "!a" "0-8" digit nonascii))) + "[^!0-8a[:digit:][:nonascii:]]")) + (should (equal (rx (any) (not (any))) + "\\`a\\`\\(?:.\\|\n\\)")) + (should (equal (rx (any "") (not (any ""))) + "\\`a\\`\\(?:.\\|\n\\)"))) (ert-deftest rx-pcase () (should (equal (pcase "a 1 2 3 1 1 b" @@ -71,7 +138,11 @@ (backref u) space (backref 1)) (list u v))) - '("1" "3")))) + '("1" "3"))) + (let ((k "blue")) + (should (equal (pcase "<blue>" + ((rx "<" (literal k) ">") 'ok)) + 'ok)))) (ert-deftest rx-kleene () "Test greedy and non-greedy repetition operators." @@ -94,71 +165,158 @@ (should (equal (rx (maximal-match (seq (* "a") (+ "b") (\? "c") (?\s "d") (*? "e") (+? "f") (\?? "g") (?? "h")))) - "a*b+c?d?e*?f+?g??h??"))) + "a*b+c?d?e*?f+?g??h??")) + (should (equal (rx "a" (*) (+ (*)) (? (*) (+)) "b") + "ab"))) -(ert-deftest rx-or () - ;; Test or-pattern reordering (Bug#34641). - (let ((s "abc")) - (should (equal (and (string-match (rx (or "abc" "ab" "a")) s) - (match-string 0 s)) - "abc")) - (should (equal (and (string-match (rx (or "ab" "abc" "a")) s) - (match-string 0 s)) - "ab")) - (should (equal (and (string-match (rx (or "a" "ab" "abc")) s) - (match-string 0 s)) - "a"))) - ;; Test zero-argument `or'. - (should (equal (rx (or)) regexp-unmatchable))) +(ert-deftest rx-repeat () + (should (equal (rx (= 3 "a") (>= 51 "b") + (** 2 11 "c") (repeat 6 "d") (repeat 4 8 "e")) + "a\\{3\\}b\\{51,\\}c\\{2,11\\}d\\{6\\}e\\{4,8\\}")) + (should (equal (rx (= 0 "k") (>= 0 "l") (** 0 0 "m") (repeat 0 "n") + (repeat 0 0 "o")) + "k\\{0\\}l\\{0,\\}m\\{0\\}n\\{0\\}o\\{0\\}")) + (should (equal (rx (opt (0+ "a"))) + "\\(?:a*\\)?")) + (should (equal (rx (opt (= 4 "a"))) + "a\\{4\\}?")) + (should (equal (rx "a" (** 3 7) (= 4) (>= 3) (= 4 (>= 7) (= 2)) "b") + "ab"))) + +(ert-deftest rx-atoms () + (should (equal (rx anything) + ".\\|\n")) + (should (equal (rx line-start not-newline nonl any line-end) + "^...$")) + (should (equal (rx bol string-start string-end buffer-start buffer-end + bos eos bot eot eol) + "^\\`\\'\\`\\'\\`\\'\\`\\'$")) + (should (equal (rx point word-start word-end bow eow symbol-start symbol-end + word-boundary not-word-boundary not-wordchar) + "\\=\\<\\>\\<\\>\\_<\\_>\\b\\B\\W")) + (should (equal (rx digit numeric num control cntrl) + "[[:digit:]][[:digit:]][[:digit:]][[:cntrl:]][[:cntrl:]]")) + (should (equal (rx hex-digit hex xdigit blank) + "[[:xdigit:]][[:xdigit:]][[:xdigit:]][[:blank:]]")) + (should (equal (rx graph graphic print printing) + "[[:graph:]][[:graph:]][[:print:]][[:print:]]")) + (should (equal (rx alphanumeric alnum letter alphabetic alpha) + "[[:alnum:]][[:alnum:]][[:alpha:]][[:alpha:]][[:alpha:]]")) + (should (equal (rx ascii nonascii lower lower-case) + "[[:ascii:]][[:nonascii:]][[:lower:]][[:lower:]]")) + (should (equal (rx punctuation punct space whitespace white) + "[[:punct:]][[:punct:]][[:space:]][[:space:]][[:space:]]")) + (should (equal (rx upper upper-case word wordchar) + "[[:upper:]][[:upper:]][[:word:]][[:word:]]")) + (should (equal (rx unibyte multibyte) + "[[:unibyte:]][[:multibyte:]]"))) + +(ert-deftest rx-syntax () + (should (equal (rx (syntax whitespace) (syntax punctuation) + (syntax word) (syntax symbol) + (syntax open-parenthesis) (syntax close-parenthesis)) + "\\s-\\s.\\sw\\s_\\s(\\s)")) + (should (equal (rx (syntax string-quote) (syntax paired-delimiter) + (syntax escape) (syntax character-quote) + (syntax comment-start) (syntax comment-end) + (syntax string-delimiter) (syntax comment-delimiter)) + "\\s\"\\s$\\s\\\\s/\\s<\\s>\\s|\\s!"))) + +(ert-deftest rx-category () + (should (equal (rx (category space-for-indent) (category base) + (category consonant) (category base-vowel) + (category upper-diacritical-mark) + (category lower-diacritical-mark) + (category tone-mark) (category symbol) + (category digit) + (category vowel-modifying-diacritical-mark) + (category vowel-sign) (category semivowel-lower) + (category not-at-end-of-line) + (category not-at-beginning-of-line)) + "\\c \\c.\\c0\\c1\\c2\\c3\\c4\\c5\\c6\\c7\\c8\\c9\\c<\\c>")) + (should (equal (rx (category alpha-numeric-two-byte) + (category chinese-two-byte) (category greek-two-byte) + (category japanese-hiragana-two-byte) + (category indian-two-byte) + (category japanese-katakana-two-byte) + (category strong-left-to-right) + (category korean-hangul-two-byte) + (category strong-right-to-left) + (category cyrillic-two-byte) + (category combining-diacritic)) + "\\cA\\cC\\cG\\cH\\cI\\cK\\cL\\cN\\cR\\cY\\c^")) + (should (equal (rx (category ascii) (category arabic) (category chinese) + (category ethiopic) (category greek) (category korean) + (category indian) (category japanese) + (category japanese-katakana) (category latin) + (category lao) (category tibetan)) + "\\ca\\cb\\cc\\ce\\cg\\ch\\ci\\cj\\ck\\cl\\co\\cq")) + (should (equal (rx (category japanese-roman) (category thai) + (category vietnamese) (category hebrew) + (category cyrillic) (category can-break)) + "\\cr\\ct\\cv\\cw\\cy\\c|")) + (should (equal (rx (category ?g) (not (category ?~))) + "\\cg\\C~"))) + +(ert-deftest rx-not () + (should (equal (rx (not word-boundary)) + "\\B")) + (should (equal (rx (not ascii) (not lower-case) (not wordchar)) + "[^[:ascii:]][^[:lower:]][^[:word:]]")) + (should (equal (rx (not (syntax punctuation)) (not (syntax escape))) + "\\S.\\S\\")) + (should (equal (rx (not (category tone-mark)) (not (category lao))) + "\\C4\\Co"))) + +(ert-deftest rx-group () + (should (equal (rx (group nonl) (submatch "x") + (group-n 3 "y") (submatch-n 13 "z") (backref 1)) + "\\(.\\)\\(x\\)\\(?3:y\\)\\(?13:z\\)\\1")) + (should (equal (rx (group) (group-n 2)) + "\\(\\)\\(?2:\\)"))) + +(ert-deftest rx-regexp () + (should (equal (rx (regexp "abc") (regex "[de]")) + "\\(?:abc\\)[de]")) + (let ((x "a*")) + (should (equal (rx (regexp x) "b") + "\\(?:a*\\)b")) + (should (equal (rx "" (regexp x) (eval "")) + "a*")))) + +(ert-deftest rx-eval () + (should (equal (rx (eval (list 'syntax 'symbol))) + "\\s_")) + (should (equal (rx "a" (eval (concat)) "b") + "ab"))) + +(ert-deftest rx-literal () + (should (equal (rx (literal (char-to-string 42)) nonl) + "\\*.")) + (let ((x "a+b")) + (should (equal (rx (opt (literal (upcase x)))) + "\\(?:A\\+B\\)?")))) + +(ert-deftest rx-to-string () + (should (equal (rx-to-string '(or nonl "\nx")) + "\\(?:.\\|\nx\\)")) + (should (equal (rx-to-string '(or nonl "\nx") t) + ".\\|\nx"))) + + +(ert-deftest rx-constituents () + (let ((rx-constituents + (append '((beta . gamma) + (gamma . "a*b") + (delta . ((lambda (form) + (regexp-quote (format "<%S>" form))) + 1 nil symbolp)) + (epsilon . delta)) + rx-constituents))) + (should (equal (rx-to-string '(seq (+ beta) nonl gamma) t) + "\\(?:a*b\\)+.\\(?:a*b\\)")) + (should (equal (rx-to-string '(seq (delta a b c) (* (epsilon d e))) t) + "\\(?:<(delta a b c)>\\)\\(?:<(epsilon d e)>\\)*")))) -(ert-deftest rx-seq () - ;; Test zero-argument `seq'. - (should (equal (rx (seq)) ""))) - -(defmacro rx-tests--match (regexp string &optional match) - (macroexp-let2 nil strexp string - `(ert-info ((format "Matching %S to %S" ',regexp ,strexp)) - (should (string-match ,regexp ,strexp)) - ,@(when match - `((should (equal (match-string 0 ,strexp) ,match))))))) - -(ert-deftest rx-nonstring-expr () - (let ((bee "b") - (vowel "[aeiou]")) - (rx-tests--match (rx "a" (literal bee) "c") "abc") - (rx-tests--match (rx "a" (regexp bee) "c") "abc") - (rx-tests--match (rx "a" (or (regexp bee) "xy") "c") "abc") - (rx-tests--match (rx "a" (or "xy" (regexp bee)) "c") "abc") - (should-not (string-match (rx (or (regexp bee) "xy")) "")) - (rx-tests--match (rx "a" (= 3 (regexp bee)) "c") "abbbc") - (rx-tests--match (rx "x" (= 3 (regexp vowel)) "z") "xeoez") - (should-not (string-match (rx "x" (= 3 (regexp vowel)) "z") "xe[]z")) - (rx-tests--match (rx "x" (= 3 (literal vowel)) "z") - "x[aeiou][aeiou][aeiou]z") - (rx-tests--match (rx "x" (repeat 1 (regexp vowel)) "z") "xaz") - (rx-tests--match (rx "x" (repeat 1 2 (regexp vowel)) "z") "xaz") - (rx-tests--match (rx "x" (repeat 1 2 (regexp vowel)) "z") "xauz") - (rx-tests--match (rx "x" (>= 1 (regexp vowel)) "z") "xaiiz") - (rx-tests--match (rx "x" (** 1 2 (regexp vowel)) "z") "xaiz") - (rx-tests--match (rx "x" (group (regexp vowel)) "z") "xaz") - (rx-tests--match (rx "x" (group-n 1 (regexp vowel)) "z") "xaz") - (rx-tests--match (rx "x" (? (regexp vowel)) "z") "xz"))) - -(ert-deftest rx-nonstring-expr-non-greedy () - "`rx's greediness can't affect runtime regexp parts." - (let ((ad-min "[ad]*?") - (ad-max "[ad]*") - (ad "[ad]")) - (rx-tests--match (rx "c" (regexp ad-min) "a") "cdaaada" "cda") - (rx-tests--match (rx "c" (regexp ad-max) "a") "cdaaada" "cdaaada") - (rx-tests--match (rx "c" (minimal-match (regexp ad-max)) "a") "cdaaada" "cdaaada") - (rx-tests--match (rx "c" (maximal-match (regexp ad-min)) "a") "cdaaada" "cda") - (rx-tests--match (rx "c" (minimal-match (0+ (regexp ad))) "a") "cdaaada" "cda") - (rx-tests--match (rx "c" (maximal-match (0+ (regexp ad))) "a") "cdaaada" "cdaaada"))) - -(ert-deftest rx-to-string-lisp-forms () - (rx-tests--match (rx-to-string '(seq "a" (literal "b") "c")) "abc") - (rx-tests--match (rx-to-string '(seq "a" (regexp "b") "c")) "abc")) (provide 'rx-tests) -;; rx-tests.el ends here. |