summaryrefslogtreecommitdiff
path: root/ext/mbstring/mbregex.h
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mbstring/mbregex.h')
-rw-r--r--ext/mbstring/mbregex.h213
1 files changed, 213 insertions, 0 deletions
diff --git a/ext/mbstring/mbregex.h b/ext/mbstring/mbregex.h
new file mode 100644
index 0000000000..488da07f09
--- /dev/null
+++ b/ext/mbstring/mbregex.h
@@ -0,0 +1,213 @@
+/* Definitions for data structures and routines for the regular
+ expression library, version 0.12.
+ Copyright (C) 1985,89,90,91,92,93,95,96,97,98 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library. Its master source is NOT part of
+ the C library, however. The master source lives in /gd/gnu/lib.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+/* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto)
+ Last change: May 21, 1993 by t^2 */
+/* modified for Ruby by matz@netlab.co.jp */
+
+#ifndef __MB_REGEXP_LIBRARY
+#define __MB_REGEXP_LIBRARY
+
+#include <stddef.h>
+
+/* Define number of parens for which we record the beginnings and ends.
+ This affects how much space the `struct re_registers' type takes up. */
+#ifndef MBRE_NREGS
+#define MBRE_NREGS 10
+#endif
+
+#define MBRE_BYTEWIDTH 8
+
+#define MBRE_REG_MAX ((1<<MBRE_BYTEWIDTH)-1)
+
+/* Maximum number of duplicates an interval can allow. */
+#ifndef MBRE_DUP_MAX
+#define MBRE_DUP_MAX ((1 << 15) - 1)
+#endif
+
+
+/* If this bit is set, then character classes are supported; they are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+#define MBRE_CHAR_CLASSES (1L << 9)
+
+/* match will be done case insensetively */
+#define MBRE_OPTION_IGNORECASE (1L)
+/* perl-style extended pattern available */
+#define MBRE_OPTION_EXTENDED (MBRE_OPTION_IGNORECASE<<1)
+/* newline will be included for . */
+#define MBRE_OPTION_MULTILINE (MBRE_OPTION_EXTENDED<<1)
+/* ^ and $ ignore newline */
+#define MBRE_OPTION_SINGLELINE (MBRE_OPTION_MULTILINE<<1)
+/* works line Perl's /s; it's called POSIX for wrong reason */
+#define MBRE_OPTION_POSIXLINE (MBRE_OPTION_MULTILINE|MBRE_OPTION_SINGLELINE)
+/* search for longest match, in accord with POSIX regexp */
+#define MBRE_OPTION_LONGEST (MBRE_OPTION_SINGLELINE<<1)
+
+#define MBRE_MAY_IGNORECASE (MBRE_OPTION_LONGEST<<1)
+#define MBRE_OPTIMIZE_ANCHOR (MBRE_MAY_IGNORECASE<<1)
+#define MBRE_OPTIMIZE_EXACTN (MBRE_OPTIMIZE_ANCHOR<<1)
+#define MBRE_OPTIMIZE_NO_BM (MBRE_OPTIMIZE_EXACTN<<1)
+#define MBRE_OPTIMIZE_BMATCH (MBRE_OPTIMIZE_NO_BM<<1)
+
+/* For multi-byte char support */
+#define MBCTYPE_ASCII 0
+#define MBCTYPE_EUC 1
+#define MBCTYPE_SJIS 2
+#define MBCTYPE_UTF8 3
+
+#if 0
+#if defined IMPORT || defined USEIMPORTLIB
+extern __declspec(dllimport)
+#elif defined EXPORT
+extern __declspec(dllexport)
+#else
+extern
+#endif
+const unsigned char *re_mbctab;
+#if defined(__STDC__)
+void re_mbcinit (int);
+#else
+void re_mbcinit ();
+#endif
+#endif
+
+#undef ismbchar
+#define ismbchar(c) re_mbctab[(unsigned char)(c)]
+#define mbclen(c) (re_mbctab[(unsigned char)(c)]+1)
+
+/* Structure used in re_match() */
+
+typedef union
+{
+ unsigned char *word;
+ struct {
+ unsigned is_active : 1;
+ unsigned matched_something : 1;
+ } bits;
+} mbre_register_info_type;
+
+/* This data structure is used to represent a compiled pattern. */
+
+struct mbre_pattern_buffer
+ {
+ char *buffer; /* Space holding the compiled pattern commands. */
+ int allocated; /* Size of space that `buffer' points to. */
+ int used; /* Length of portion of buffer actually occupied */
+ char *fastmap; /* Pointer to fastmap, if any, or zero if none. */
+ /* re_search uses the fastmap, if there is one,
+ to skip over totally implausible characters. */
+ char *must; /* Pointer to exact pattern which strings should have
+ to be matched. */
+ int *must_skip; /* Pointer to exact pattern skip table for bm_search */
+ char *stclass; /* Pointer to character class list at top */
+ long options; /* Flags for options such as extended_pattern. */
+ long re_nsub; /* Number of subexpressions found by the compiler. */
+ char fastmap_accurate;
+ /* Set to zero when a new pattern is stored,
+ set to one when the fastmap is updated from it. */
+ char can_be_null; /* Set to one by compiling fastmap
+ if this pattern might match the null string.
+ It does not necessarily match the null string
+ in that case, but if this is zero, it cannot.
+ 2 as value means can match null string
+ but at end of range or before a character
+ listed in the fastmap. */
+
+ /* stack & working area for re_match() */
+ unsigned char **regstart;
+ unsigned char **regend;
+ unsigned char **old_regstart;
+ unsigned char **old_regend;
+ mbre_register_info_type *reg_info;
+ unsigned char **best_regstart;
+ unsigned char **best_regend;
+
+ int mbctype;
+ };
+
+typedef struct mbre_pattern_buffer mb_regex_t;
+
+/* Structure to store register contents data in.
+
+ Pass the address of such a structure as an argument to re_match, etc.,
+ if you want this information back.
+
+ For i from 1 to MBRE_NREGS - 1, start[i] records the starting index in
+ the string of where the ith subexpression matched, and end[i] records
+ one after the ending index. start[0] and end[0] are analogous, for
+ the entire pattern. */
+
+struct mbre_registers
+ {
+ int allocated;
+ int num_regs;
+ int *beg;
+ int *end;
+ };
+
+#if 0
+/* Type for byte offsets within the string. POSIX mandates this. */
+typedef size_t regoff_t;
+
+/* POSIX specification for registers. Aside from the different names than
+ `mbre_registers', POSIX uses an array of structures, instead of a
+ structure of arrays. */
+typedef struct
+{
+ regoff_t rm_so; /* Byte offset from string's start to substring's start. */
+ regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
+} regmatch_t;
+#endif
+
+
+#ifdef __STDC__
+
+extern char *mbre_compile_pattern (const char *, int, struct mbre_pattern_buffer *);
+void mbre_free_pattern (struct mbre_pattern_buffer *);
+/* Is this really advertised? */
+extern int mbre_adjust_startpos (struct mbre_pattern_buffer *, const char*, int, int, int);
+extern void mbre_compile_fastmap (struct mbre_pattern_buffer *);
+extern int mbre_search (struct mbre_pattern_buffer *, const char*, int, int, int,
+ struct mbre_registers *);
+extern int mbre_match (struct mbre_pattern_buffer *, const char *, int, int,
+ struct mbre_registers *);
+extern void mbre_set_casetable (const char *table);
+extern void mbre_copy_registers (struct mbre_registers*, struct mbre_registers*);
+extern void mbre_free_registers (struct mbre_registers*);
+
+#else /* !__STDC__ */
+
+extern char *mbre_compile_pattern ();
+void mbre_free_regexp ();
+/* Is this really advertised? */
+extern int mbre_adjust_startpos ();
+extern void mbre_compile_fastmap ();
+extern int mbre_search ();
+extern int mbre_match ();
+extern void mbre_set_casetable ();
+extern void mbre_copy_registers ();
+extern void mbre_free_registers ();
+
+#endif /* __STDC__ */
+
+#endif /* !__MB_REGEXP_LIBRARY */