diff options
author | Burdette Lamar <BurdetteLamar@Yahoo.com> | 2022-04-15 13:31:15 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-15 13:31:15 -0500 |
commit | e021754db013ca9cd6dbd68b416425b32ee81490 (patch) | |
tree | a8dfc116cf5a5f80891a6b62085d844f1a3c56eb | |
parent | 7f81f335478a3ca873f34e3bc0af6927819d3e84 (diff) | |
download | ruby-e021754db013ca9cd6dbd68b416425b32ee81490.tar.gz |
[DOC] Enhanced RDoc for Regexp (#5807)
Treats:
#source
#inspect
#to_s
#casefold?
#options
#names
#named_captures
-rw-r--r-- | doc/regexp.rdoc | 12 | ||||
-rw-r--r-- | re.c | 162 |
2 files changed, 90 insertions, 84 deletions
diff --git a/doc/regexp.rdoc b/doc/regexp.rdoc index 65d8cd46fa..b8efc7e3d4 100644 --- a/doc/regexp.rdoc +++ b/doc/regexp.rdoc @@ -35,7 +35,7 @@ exceeded. See "Timeout" section in detail. Pattern matching may be achieved by using <tt>=~</tt> operator or Regexp#match method. -=== <tt>=~</tt> operator +=== <tt>=~</tt> Operator <tt>=~</tt> is Ruby's basic pattern-matching operator. When one operand is a regular expression and the other is a string then the regular expression is @@ -54,7 +54,7 @@ Using <tt>=~</tt> operator with a String and Regexp the <tt>$~</tt> global variable is set after a successful match. <tt>$~</tt> holds a MatchData object. Regexp.last_match is equivalent to <tt>$~</tt>. -=== Regexp#match method +=== Regexp#match Method The #match method returns a MatchData object: @@ -193,7 +193,7 @@ At least one uppercase character ('H'), at least one lowercase character "Hello".match(/[[:upper:]]+[[:lower:]]+l{2}o/) #=> #<MatchData "Hello"> -=== Greedy match +=== Greedy Match Repetition is <i>greedy</i> by default: as many occurrences as possible are matched while still allowing the overall match to succeed. By @@ -211,7 +211,7 @@ Both patterns below match the string. The first uses a greedy quantifier so /<.+>/.match("<a><b>") #=> #<MatchData "<a><b>"> /<.+?>/.match("<a><b>") #=> #<MatchData "<a>"> -=== Possessive match +=== Possessive Match A quantifier followed by <tt>+</tt> matches <i>possessively</i>: once it has matched it does not backtrack. They behave like greedy quantifiers, @@ -256,7 +256,7 @@ this backreference when doing substitution: "The cat sat in the hat".gsub(/[csh]at/, '\0s') # => "The cats sats in the hats" -=== Named captures +=== Named Captures Capture groups can be referred to by name when defined with the <tt>(?<</tt><i>name</i><tt>>)</tt> or <tt>(?'</tt><i>name</i><tt>')</tt> @@ -672,7 +672,7 @@ regexp's encoding can be explicitly fixed by supplying # raises Encoding::CompatibilityError: incompatible encoding regexp match # (ISO-8859-1 regexp with UTF-8 string) -== Special global variables +== Special Global Variables Pattern matching sets some global variables : * <tt>$~</tt> is equivalent to Regexp.last_match; @@ -480,15 +480,15 @@ rb_reg_desc(const char *s, long len, VALUE re) /* * call-seq: - * rxp.source -> str + * source -> string * - * Returns the original string of the pattern. + * Returns the original string of +self+: * - * /ab+c/ix.source #=> "ab+c" + * /ab+c/ix.source # => "ab+c" * - * Note that escape sequences are retained as is. + * Note that escape sequences are retained as is: * - * /\x20\+/.source #=> "\\x20\\+" + * /\x20\+/.source # => "\\x20\\+" * */ @@ -503,15 +503,14 @@ rb_reg_source(VALUE re) } /* - * call-seq: - * rxp.inspect -> string + * call-seq: + * inspect -> string * - * Produce a nicely formatted string-version of _rxp_. Perhaps surprisingly, - * <code>#inspect</code> actually produces the more natural version of - * the string than <code>#to_s</code>. + * Returns a nicely-formatted string representation of +self+: * - * /ab+c/ix.inspect #=> "/ab+c/ix" + * /ab+c/ix.inspect # => "/ab+c/ix" * + * Related: Regexp#to_s. */ static VALUE @@ -527,22 +526,29 @@ static VALUE rb_reg_str_with_term(VALUE re, int term); /* * call-seq: - * rxp.to_s -> str - * - * Returns a string containing the regular expression and its options (using the - * <code>(?opts:source)</code> notation. This string can be fed back in to - * Regexp::new to a regular expression with the same semantics as the - * original. (However, <code>Regexp#==</code> may not return true - * when comparing the two, as the source of the regular expression - * itself may differ, as the example shows). Regexp#inspect produces - * a generally more readable version of <i>rxp</i>. - * - * r1 = /ab+c/ix #=> /ab+c/ix - * s1 = r1.to_s #=> "(?ix-m:ab+c)" - * r2 = Regexp.new(s1) #=> /(?ix-m:ab+c)/ - * r1 == r2 #=> false - * r1.source #=> "ab+c" - * r2.source #=> "(?ix-m:ab+c)" + * to_s -> string + * + * Returns a string showing the options and string of +self+: + * + * r0 = /ab+c/ix + * s0 = r0.to_s # => "(?ix-m:ab+c)" + * + * The returned string may be used as an argument to Regexp.new, + * or as interpolated text for a + * {Regexp literal}[rdoc-ref:regexp.rdoc@Regexp+Literal]: + * + * r1 = Regexp.new(s0) # => /(?ix-m:ab+c)/ + * r2 = /#{s0}/ # => /(?ix-m:ab+c)/ + * + * Note that +r1+ and +r2+ are not equal to +r0+ + * because their original strings are different: + * + * r0 == r1 # => false + * r0.source # => "ab+c" + * r1.source # => "(?ix-m:ab+c)" + * + * Related: Regexp#inspect. + * */ static VALUE @@ -713,13 +719,15 @@ rb_reg_raise_str(VALUE str, int options, const char *err) /* * call-seq: - * rxp.casefold? -> true or false + * casefold?-> true or false * - * Returns the value of the case-insensitive flag. + * Returns +true+ if the case-insensitivity flag in +self+ is set, + * +false+ otherwise: + * + * /a/.casefold? # => false + * /a/i.casefold? # => true + * /(?i:a)/.casefold? # => false * - * /a/.casefold? #=> false - * /a/i.casefold? #=> true - * /(?i:a)/.casefold? #=> false */ static VALUE @@ -732,25 +740,39 @@ rb_reg_casefold_p(VALUE re) /* * call-seq: - * rxp.options -> integer + * options -> integer + * + * Returns an integer whose bits show the options set in +self+. + * + * The option bits are: + * + * Regexp::IGNORECASE # => 1 + * Regexp::EXTENDED # => 2 + * Regexp::MULTILINE # => 4 + * + * Examples: + * + * /foo/.options # => 0 + * /foo/i.options # => 1 + * /foo/x.options # => 2 + * /foo/m.options # => 4 + * /foo/mix.options # => 7 + * + * Note that additional bits may be set in the returned integer; + * these are maintained internally internally in +self+, + * are ignored if passed to Regexp.new, and may be ignored by the caller: * * Returns the set of bits corresponding to the options used when - * creating this Regexp (see Regexp::new for details. Note that + * creating this regexp (see Regexp::new for details). Note that * additional bits may be set in the returned options: these are used * internally by the regular expression code. These extra bits are - * ignored if the options are passed to Regexp::new. + * ignored if the options are passed to Regexp::new: * - * Regexp::IGNORECASE #=> 1 - * Regexp::EXTENDED #=> 2 - * Regexp::MULTILINE #=> 4 + * r = /\xa1\xa2/e # => /\xa1\xa2/ + * r.source # => "\\xa1\\xa2" + * r.options # => 16 + * Regexp.new(r.source, r.options) # => /\xa1\xa2/ * - * /cat/.options #=> 0 - * /cat/ix.options #=> 3 - * Regexp.new('cat', true).options #=> 1 - * /\xa1\xa2/e.options #=> 16 - * - * r = /cat/ix - * Regexp.new(r.source, r.options) #=> /cat/ix */ static VALUE @@ -770,19 +792,16 @@ reg_names_iter(const OnigUChar *name, const OnigUChar *name_end, } /* - * call-seq: - * rxp.names -> [name1, name2, ...] - * - * Returns a list of names of captures as an array of strings. + * call-seq: + * names -> array_of_names * - * /(?<foo>.)(?<bar>.)(?<baz>.)/.names - * #=> ["foo", "bar", "baz"] + * Returns an array of names of captures + * (see {Named Captures}[rdoc-ref:Regexp@Named+Captures]): * - * /(?<foo>.)(?<foo>.)/.names - * #=> ["foo"] + * /(?<foo>.)(?<bar>.)(?<baz>.)/.names # => ["foo", "bar", "baz"] + * /(?<foo>.)(?<foo>.)/.names # => ["foo"] + * /(.)(.)/.names # => [] * - * /(.)(.)/.names - * #=> [] */ static VALUE @@ -812,25 +831,21 @@ reg_named_captures_iter(const OnigUChar *name, const OnigUChar *name_end, } /* - * call-seq: - * rxp.named_captures -> hash - * - * Returns a hash representing information about named captures of <i>rxp</i>. + * call-seq: + * named_captures -> hash * - * A key of the hash is a name of the named captures. - * A value of the hash is an array which is list of indexes of corresponding - * named captures. + * Returns a hash representing named captures of +self+ + * (see {Named Captures}[rdoc-ref:Regexp@Named+Captures]): * - * /(?<foo>.)(?<bar>.)/.named_captures - * #=> {"foo"=>[1], "bar"=>[2]} + * - Each key is the name of a named capture. + * - Each value is an array of integer indexes for that named capture. * - * /(?<foo>.)(?<foo>.)/.named_captures - * #=> {"foo"=>[1, 2]} + * Examples: * - * If there are no named captures, an empty hash is returned. + * /(?<foo>.)(?<bar>.)/.named_captures # => {"foo"=>[1], "bar"=>[2]} + * /(?<foo>.)(?<foo>.)/.named_captures # => {"foo"=>[1, 2]} + * /(.)(.)/.named_captures # => {} * - * /(.)(.)/.named_captures - * #=> {} */ static VALUE @@ -4239,15 +4254,6 @@ rb_reg_timeout_get(VALUE re) /* * Document-class: Regexp * - * A Regexp holds a regular expression, used to match a pattern - * against strings. Regexps are created using the <code>/.../</code> - * and <code>%r{...}</code> literals, and by the Regexp::new - * constructor. - * - * You can create a \Regexp object explicitly with: - * - * - A {regexp literal}[rdoc-ref:syntax/literals.rdoc@Regexp+Literals]. - * * :include: doc/regexp.rdoc */ |