1 files changed, 224 insertions, 0 deletions
diff --git a/ext/mbstring/oniguruma/doc/RE b/ext/mbstring/oniguruma/doc/RE
new file mode 100644
index 0000000000..3527b4556f
--- /dev/null
+++ b/ext/mbstring/oniguruma/doc/RE
@@ -0,0 +1,224 @@
+Oniguruma Regular Expressions     2003/07/04
+
+syntax: REG_SYNTAX_RUBY (default)
+
+
+1. Syntax elements
+
+  \       escape
+  |       alternation
+  (...)   group
+  [...]   character class  
+
+
+2. Characters
+
+  \t           horizontal tab (0x09)
+  \v           vertical tab   (0x0B)
+  \n           newline        (0x0A)
+  \r           return         (0x0D)
+  \b           back space     (0x08) (* in character class only)
+  \f           form feed      (0x0C)
+  \a           bell           (0x07)
+  \e           escape         (0x1B)
+  \nnn         octal char
+  \xHH         hexadecimal char
+  \x{7HHHHHHH} wide hexadecimal char
+  \cx          control char
+  \C-x         control char
+  \M-x         meta  (x|0x80)  
+  \M-\C-x      meta control char
+
+
+3. Character types
+
+  .        any character (except newline)
+  \w       word character (alphanumeric, "_" and multibyte char)
+  \W       non-word char
+  \s       whitespace char (\t, \n, \v, \f, \r, \x20)
+  \S       non-whitespace char
+  \d       digit char
+  \D       non-digit char
+
+
+4. Quantifier
+
+  greedy
+
+  ?       1 or 0 times
+  *       0 or more times
+  +       1 or more times
+  {n,m}   at least n but not more than m times  
+  {n,}    at least n times
+  {n}     n times
+
+  reluctant
+
+  ??      1 or 0 times
+  *?      0 or more times
+  +?      1 or more times
+  {n,m}?  at least n but not more than m times  
+  {n,}?   at least n times
+
+  possessive (greedy and does not backtrack after repeated)
+
+  ?+      1 or 0 times
+  *+      0 or more times
+  ++      1 or more times
+
+
+5. Anchors
+
+  ^       beginning of the line
+  $       end of the line
+  \b      word boundary
+  \B      not word boundary
+  \A      beginning of string
+  \Z      end of string, or before newline at the end
+  \z      end of string
+  \G      previous end-of-match position
+
+
+6. POSIX character class  ([:xxxxx:], negate [:^xxxxx:])
+
+  alnum    alphabet or digit char
+  alpha    alphabet
+  ascii    code value: [0 - 127]
+  blank    \t, \x20
+  cntrl
+  digit    0-9
+  graph
+  lower
+  print
+  punct
+  space    \t, \n, \v, \f, \r, \x20
+  upper
+  xdigit   0-9, a-f, A-F
+
+
+7. Operators in character class
+
+  [...]   group (character class in character class)
+  &&      intersection
+         (lowest precedence operator in character class)
+          
+  ex. [a-w&&[^c-g]z] ==> ([a-w] and ([^c-g] or z)) ==> [abh-w]
+
+
+8. Extended expressions
+
+  (?#...)              comment
+  (?imx-imx)           option on/off
+                         i: ignore case
+                         m: multi-line (dot(.) match newline)
+                         x: extended form
+  (?imx-imx:subexp)    option on/off for subexp
+  (?:subexp)           not captured
+  (?=subexp)           look-ahead
+  (?!subexp)           negative look-ahead
+  (?<=subexp)          look-behind
+  (?<!subexp)          negative look-behind
+
+                       Subexp of look-behind must be fixed character length.
+                       But different character length is allowed in top level
+                       alternatives only.
+                       ex. (?<=a|bc) is OK. (?<=aaa(?:b|cd)) is not allowed.
+
+  (?>subexp)           don't backtrack
+  (?<name>subexp)      define named group
+                       (name can not include '>', ')', '\' and NUL character)
+
+
+9. Back reference
+
+  \n          back reference by group number (n >= 1)
+  \k<name>    back reference by group name
+
+
+10. Subexp call ("Tanaka Akira special")
+
+  \g<name>    call by group name
+  \g<n>       call by group number (only if 'n' is not defined as name)
+
+
+-----------------------------
+11. Original extensions
+
+   + named group     (?<name>...)
+   + named backref   \k<name>
+   + subexp call     \g<name>, \g<group-num>
+
+
+12. Lacked features compare with perl 5.8.0
+
+   + [:word:]
+   + \N{name}
+   + \l,\u,\L,\U, \P, \X, \C
+   + (?{code})
+   + (??{code})
+   + (?(condition)yes-pat|no-pat)
+
+   + \Q...\E   (* This is effective on REG_SYNTAX_PERL and REG_SYNTAX_JAVA)
+
+
+13. Syntax depend options
+
+   + REG_SYNTAX_RUBY (default)
+     (?m): dot(.) match newline
+
+   + REG_SYNTAX_PERL, REG_SYNTAX_JAVA
+     (?s):  dot(.) match newline
+     (?m): ^ match after newline, $ match before newline
+
+
+14. Differences with Japanized GNU regex(version 0.12) of Ruby
+
+   + add look behind
+     (?<=fixed-char-length-pattern), (?<!fixed-char-length-pattern)
+     (in negative-look-behind, capture group isn't allowed, 
+      shy group(?:) is allowed.)
+   + add possessive quantifier. ?+, *+, ++
+   + add operations in character class. [], &&
+   + add named group and subexp call.
+   + octal or hexadecimal number sequence can be treated as 
+     a multibyte code char in char-class, if multibyte encoding is specified.
+     (ex. [\xa1\xa2], [\xa1\xa7-\xa4\xa1])
+   + effect range of isolated option is to next ')'.
+     ex. (?:(?i)a|b) is interpreted as (?:(?i:a|b)), not (?:(?i:a)|b).
+   + isolated option is not transparent to previous pattern.
+     ex. a(?i)* is a syntax error pattern.
+   + allowed incompleted left brace as an usual char.
+     ex. /{/, /({)/, /a{2,3/ etc...
+   + negative POSIX bracket [:^xxxx:] is supported.
+   + POSIX bracket [:ascii:] is added.
+   + repeat of look-ahead is not allowd.
+     ex. /(?=a)*/, /(?!b){5}/
+
+
+14. Problems
+
+   + Invalid first byte in UTF-8 is allowed.
+     (which is the same as GNU regex of Ruby)
+
+       /./u =~ "\xa3"
+
+     Of course, although it is possible to validate,
+     it will become later than now.
+
+   + Zero-length match in infinite repeat stops the repeat,
+     and captured group status isn't checked as stop condition.
+
+       /()*\1/ =~ ""            #=> match
+       /(?:()|())*\1\2/ =~ ""   #=> fail
+
+       /(?:\1a|())*/ =~ "a"     #=> match with ""
+
+   + Ignore case option is not effect to an octal or hexadecimal 
+     numbered char, but it becomes effective if it appears in the char class.
+     This doesn't have consistency, though they are the specifications
+     which are the same as GNU regex of Ruby.
+
+       /\x61/i.match("A")     # => nil
+       /[\x61]/i.match("A")   # => match
+
+// END