summaryrefslogtreecommitdiff
path: root/packages/regexpr
diff options
context:
space:
mode:
authormichael <michael@3ad0048d-3df7-0310-abae-a5850022a9f2>2020-01-18 15:35:25 +0000
committermichael <michael@3ad0048d-3df7-0310-abae-a5850022a9f2>2020-01-18 15:35:25 +0000
commit49fef4ec613bba3186721611463bdb982d798ca2 (patch)
tree1894b71844f4dde28ef37902adb6b0a621f9ff46 /packages/regexpr
parent5ee44c94186ce59580aced1d9831ebf05e5e7938 (diff)
downloadfpc-49fef4ec613bba3186721611463bdb982d798ca2.tar.gz
* Fix bug ID 0036482, various fixes by Alexey T.
git-svn-id: https://svn.freepascal.org/svn/fpc/trunk@43972 3ad0048d-3df7-0310-abae-a5850022a9f2
Diffstat (limited to 'packages/regexpr')
-rw-r--r--packages/regexpr/src/regexpr.pas7899
1 files changed, 4484 insertions, 3415 deletions
diff --git a/packages/regexpr/src/regexpr.pas b/packages/regexpr/src/regexpr.pas
index 11ac1ac3e7..4d13c153e3 100644
--- a/packages/regexpr/src/regexpr.pas
+++ b/packages/regexpr/src/regexpr.pas
@@ -1,55 +1,52 @@
unit RegExpr;
{
- TRegExpr class library
- Delphi Regular Expressions
-
- Copyright (c) 1999-2004 Andrey V. Sorokin, St.Petersburg, Russia
-
- You can choose to use this Pascal unit in one of the two following licenses:
-
- Option 1>
-
- You may use this software in any kind of development,
- including comercial, redistribute, and modify it freely,
- under the following restrictions :
- 1. This software is provided as it is, without any kind of
- warranty given. Use it at Your own risk.The author is not
- responsible for any consequences of use of this software.
- 2. The origin of this software may not be mispresented, You
- must not claim that You wrote the original software. If
- You use this software in any kind of product, it would be
- appreciated that there in a information box, or in the
- documentation would be an acknowledgement like
-
- Partial Copyright (c) 2004 Andrey V. Sorokin
- http://RegExpStudio.com
- mailto:anso@mail.ru
-
- 3. You may not have any income from distributing this source
- (or altered version of it) to other developers. When You
- use this product in a comercial package, the source may
- not be charged seperatly.
- 4. Altered versions must be plainly marked as such, and must
- not be misrepresented as being the original software.
- 5. RegExp Studio application and all the visual components as
- well as documentation is not part of the TRegExpr library
- and is not free for usage.
-
- mailto:anso@mail.ru
- http://RegExpStudio.com
- http://anso.da.ru/
+ TRegExpr class library
+ Delphi Regular Expressions
+
+ Copyright (c) 1999-2004 Andrey V. Sorokin, St.Petersburg, Russia
+
+ You can choose to use this Pascal unit in one of the two following licenses:
+
+ Option 1>
+
+ You may use this software in any kind of development,
+ including comercial, redistribute, and modify it freely,
+ under the following restrictions :
+ 1. This software is provided as it is, without any kind of
+ warranty given. Use it at Your own risk.The author is not
+ responsible for any consequences of use of this software.
+ 2. The origin of this software may not be mispresented, You
+ must not claim that You wrote the original software. If
+ You use this software in any kind of product, it would be
+ appreciated that there in a information box, or in the
+ documentation would be an acknowledgement like
+
+ Partial Copyright (c) 2004 Andrey V. Sorokin
+ https://sorokin.engineer/
+ andrey@sorokin.engineer
+
+ 3. You may not have any income from distributing this source
+ (or altered version of it) to other developers. When You
+ use this product in a comercial package, the source may
+ not be charged seperatly.
+ 4. Altered versions must be plainly marked as such, and must
+ not be misrepresented as being the original software.
+ 5. RegExp Studio application and all the visual components as
+ well as documentation is not part of the TRegExpr library
+ and is not free for usage.
+
+ https://sorokin.engineer/
+ andrey@sorokin.engineer
Option 2>
The same modified LGPL with static linking exception as the Free Pascal RTL
}
-
interface
-{off $DEFINE DebugSynRegExpr}
-{$DEFINE UnicodeWordDetection}
+{ off $DEFINE DebugSynRegExpr }
{$MODE DELPHI} // Delphi-compatible mode in FreePascal
{$INLINE ON}
@@ -58,175 +55,183 @@ interface
{$BOOLEVAL OFF}
{$EXTENDEDSYNTAX ON}
{$LONGSTRINGS ON}
+{$OPTIMIZATION ON}
// ======== Define options for TRegExpr engine
-{.$DEFINE UniCode} // Unicode support
-{$ifdef FPC_OS_UNICODE}
- {$define UNICODE}
-{$endif}
-{$DEFINE RegExpPCodeDump} // p-code dumping (see Dump method)
-{$DEFINE ComplexBraces} // support braces in complex cases
-{$IFNDEF UniCode} // the option applicable only for non-UniCode mode
- {$IFNDEF FPC_REQUIRES_PROPER_ALIGNMENT} //sets have to be aligned
- {$DEFINE UseSetOfChar} // Significant optimization by using set of char
- {$ENDIF}
-{$ENDIF}
-{$IFDEF UseSetOfChar}
- {$DEFINE UseFirstCharSet} // Fast skip between matches for r.e. that starts with determined set of chars
-{$ENDIF}
-{$IFNDEF UNICODE}
-{$UNDEF UnicodeWordDetection}
+{$DEFINE UseFirstCharSet} // Enable optimization, which finds possible first chars of input string
+{$DEFINE RegExpPCodeDump} // Enable method Dump() to show opcode as string
+{$DEFINE ComplexBraces} // Support braces in complex cases
+{$IFNDEF UniCode}
+ {$UNDEF UnicodeWordDetection}
+{$ELSE}
+ {$DEFINE UnicodeWordDetection}
{$ENDIF}
-// ======== Define Pascal-language options
-// Define 'UseAsserts' option (do not edit this definitions).
-// Asserts used to catch 'strange bugs' in TRegExpr implementation (when something goes
-// completely wrong). You can swith asserts on/off with help of {$C+}/{$C-} compiler options.
-{$IFDEF FPC} {$DEFINE UseAsserts} {$ENDIF}
-// Define 'OverMeth' options, to use method overloading (do not edit this definitions).
-{$IFDEF FPC} {$DEFINE OverMeth} {$ENDIF}
-
uses
- Classes, // TStrings in Split method
- SysUtils; // Exception
+ Classes, // TStrings in Split method
+ SysUtils; // Exception
type
- {$IFDEF UniCode}
- PRegExprChar = PWideChar;
- RegExprString = UnicodeString;
- REChar = WideChar;
- {$ELSE}
- PRegExprChar = PChar;
- RegExprString = AnsiString; //###0.952 was string
- REChar = Char;
- {$ENDIF}
- TREOp = REChar; // internal p-code type //###0.933
- PREOp = ^TREOp;
- TRENextOff = PtrInt; // internal Next "pointer" (offset to current p-code) //###0.933
- PRENextOff = ^TRENextOff; // used for extracting Next "pointers" from compiled r.e. //###0.933
- TREBracesArg = integer; // type of {m,n} arguments
- PREBracesArg = ^TREBracesArg;
-
-const
- REOpSz = SizeOf (TREOp) div SizeOf (REChar); // size of p-code in RegExprString units
- {$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT}
- // add space for aligning pointer
- // -1 is the correct max size but also needed for InsertOperator that needs a multiple of pointer size
- RENextOffSz = (2 * SizeOf (TRENextOff) div SizeOf (REChar))-1;
- REBracesArgSz = (2 * SizeOf (TREBracesArg) div SizeOf (REChar)); // add space for aligning pointer
- {$ELSE}
- RENextOffSz = (SizeOf (TRENextOff) div SizeOf (REChar)); // size of Next 'pointer' -"-
- REBracesArgSz = SizeOf (TREBracesArg) div SizeOf (REChar); // size of BRACES arguments -"-
- {$ENDIF}
+ {$IFDEF UniCode}
+ PRegExprChar = PWideChar;
+ RegExprString = UnicodeString;
+ REChar = WideChar;
+ {$ELSE}
+ PRegExprChar = PChar;
+ RegExprString = AnsiString; // ###0.952 was string
+ REChar = Char;
+ {$ENDIF}
+ TREOp = REChar; // internal p-code type //###0.933
+ PREOp = ^TREOp;
type
- TRegExprInvertCaseFunction = function (const Ch : REChar) : REChar
- of object;
+ TRegExprInvertCaseFunction = function(const Ch: REChar): REChar of object;
+ TRegExprCharset = set of byte;
const
- EscChar = '\'; // 'Escape'-char ('\' in common r.e.) used for escaping metachars (\w, \d etc).
- RegExprModifierI : boolean = False; // default value for ModifierI
- RegExprModifierR : boolean = True; // default value for ModifierR
- RegExprModifierS : boolean = True; // default value for ModifierS
- RegExprModifierG : boolean = True; // default value for ModifierG
- RegExprModifierM : boolean = False; // default value for ModifierM
- RegExprModifierX : boolean = False; // default value for ModifierX
- RegExprSpaceChars : RegExprString = // default value for SpaceChars
- ' '#$9#$A#$D#$C;
- RegExprWordChars : RegExprString = // default value for WordChars
- '0123456789' //###0.940
- + 'abcdefghijklmnopqrstuvwxyz'
- + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_';
- RegExprLineSeparators : RegExprString =// default value for LineSeparators
- #$d#$a{$IFDEF UniCode}+#$b#$c#$2028#$2029#$85{$ENDIF}; //###0.947
- RegExprLinePairedSeparator : RegExprString =// default value for LinePairedSeparator
- #$d#$a;
+ // Escape char ('\' in common r.e.) used for escaping metachars (\w, \d etc)
+ EscChar = '\';
+
+ RegExprModifierI: boolean = False; // default value for ModifierI
+ RegExprModifierR: boolean = True; // default value for ModifierR
+ RegExprModifierS: boolean = True; // default value for ModifierS
+ RegExprModifierG: boolean = True; // default value for ModifierG
+ RegExprModifierM: boolean = False; // default value for ModifierM
+ RegExprModifierX: boolean = False; // default value for ModifierX
+
+ // default value for SpaceChars
+ RegExprSpaceChars: RegExprString = ' '#$9#$A#$D#$C;
+
+ // default value for WordChars
+ RegExprWordChars: RegExprString = '0123456789'
+ + 'abcdefghijklmnopqrstuvwxyz'
+ + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_';
+
+ // default value for LineSeparators
+ RegExprLineSeparators: RegExprString = #$d#$a#$b#$c
+ {$IFDEF UniCode}
+ + #$2028#$2029#$85
+ {$ENDIF};
+
+ // default value for LinePairedSeparator
+ RegExprLinePairedSeparator: RegExprString = #$d#$a;
{ if You need Unix-styled line separators (only \n), then use:
- RegExprLineSeparators = #$a;
- RegExprLinePairedSeparator = '';
+ RegExprLineSeparators = #$a;
+ RegExprLinePairedSeparator = '';
}
+ // Tab and Unicode category "Space Separator":
+ // https://www.compart.com/en/unicode/category/Zs
+ RegExprHorzSeparators: RegExprString = #9#$20#$A0
+ {$IFDEF UniCode}
+ + #$1680#$2000#$2001#$2002#$2003#$2004#$2005#$2006#$2007#$2008#$2009#$200A#$202F#$205F#$3000
+ {$ENDIF};
const
- NSUBEXP = 90; // max number of subexpression //###0.929
- // Cannot be more than NSUBEXPMAX
- // Be carefull - don't use values which overflow CLOSE opcode
- // (in this case you'll get compiler error).
- // Big NSUBEXP will cause more slow work and more stack required
- NSUBEXPMAX = 255; // Max possible value for NSUBEXP. //###0.945
- // Don't change it! It's defined by internal TRegExpr design.
-
- MaxBracesArg = $7FFFFFFF - 1; // max value for {n,m} arguments //###0.933
-
- {$IFDEF ComplexBraces}
- LoopStackMax = 10; // max depth of loops stack //###0.925
- {$ENDIF}
-
- TinySetLen = 3;
- // if range includes more then TinySetLen chars, //###0.934
- // then use full (32 bytes) ANYOFFULL instead of ANYOF[BUT]TINYSET
- // !!! Attension ! If you change TinySetLen, you must
- // change code marked as "//!!!TinySet"
+ NSUBEXP = 90; // max number of subexpression //###0.929
+ // Cannot be more than NSUBEXPMAX
+ // Be carefull - don't use values which overflow CLOSE opcode
+ // (in this case you'll get compiler error).
+ // Big NSUBEXP will cause more slow work and more stack required
+ NSUBEXPMAX = 255; // Max possible value for NSUBEXP. //###0.945
+ // Don't change it! It's defined by internal TRegExpr design.
+
+{$IFDEF ComplexBraces}
+const
+ LoopStackMax = 10; // max depth of loops stack //###0.925
+type
+ TRegExprLoopStack = array [1 .. LoopStackMax] of integer;
+{$ENDIF}
type
+ TRegExprModifiers = record
+ I: boolean;
+ // Case-insensitive.
+ R: boolean;
+ // Extended syntax for Russian ranges in [].
+ // If True, then а-я additionally includes letter 'ё',
+ // А-Я additionally includes 'Ё', and а-Я includes all Russian letters.
+ // Turn it off if it interferes with your national alphabet.
+ S: boolean;
+ // Dot '.' matches any char, otherwise only [^\n].
+ G: boolean;
+ // Greedy. Switching it off switches all operators to non-greedy style,
+ // so if G=False, then '*' works like '*?', '+' works like '+?' and so on.
+ M: boolean;
+ // Treat string as multiple lines. It changes `^' and `$' from
+ // matching at only the very start/end of the string to the start/end
+ // of any line anywhere within the string.
+ X: boolean;
+ // Allow comments in regex using # char.
+ end;
-{$IFDEF UseSetOfChar}
- PSetOfREChar = ^TSetOfREChar;
- TSetOfREChar = set of REChar;
-{$ENDIF}
+function IsModifiersEqual(const A, B: TRegExprModifiers): boolean;
- TRegExpr = class;
+type
+ TRegExpr = class;
+ TRegExprReplaceFunction = function(ARegExpr: TRegExpr): RegExprString of object;
+ TRegExprCharChecker = function(ch: REChar): boolean of object;
+ TRegExprCharCheckerArray = array[0 .. 30] of TRegExprCharChecker;
+ TRegExprCharCheckerInfo = record
+ CharBegin, CharEnd: REChar;
+ CheckerIndex: integer;
+ end;
+ TRegExprCharCheckerInfos = array of TRegExprCharCheckerInfo;
- TRegExprReplaceFunction = function (ARegExpr : TRegExpr): RegExprString of object;
+ { TRegExpr }
- { TRegExpr }
+ TRegExpr = class
+ private
+ startp: array [0 .. NSUBEXP - 1] of PRegExprChar; // found expr start points
+ endp: array [0 .. NSUBEXP - 1] of PRegExprChar; // found expr end points
- TRegExpr = class
- private
- FUseOsLineEndOnReplace: Boolean;
- startp : array [0 .. NSUBEXP - 1] of PRegExprChar; // founded expr starting points
- endp : array [0 .. NSUBEXP - 1] of PRegExprChar; // founded expr end points
+ GrpIndexes: array [0 .. NSUBEXP - 1] of integer;
+ GrpCount: integer;
{$IFDEF ComplexBraces}
- LoopStack : array [1 .. LoopStackMax] of integer; // state before entering loop
- LoopStackIdx : integer; // 0 - out of all loops
+ LoopStack: TRegExprLoopStack; // state before entering loop
+ LoopStackIdx: integer; // 0 - out of all loops
{$ENDIF}
// The "internal use only" fields to pass info from compile
// to execute that permits the execute phase to run lots faster on
// simple cases.
- regstart : REChar; // char that must begin a match; '\0' if none obvious
- reganch : REChar; // is the match anchored (at beginning-of-line only)?
- regmust : PRegExprChar; // string (pointer into program) that match must include, or nil
- regmlen : PtrInt; // length of regmust string
- // Regstart and reganch permit very fast decisions on suitable starting points
+ reganchored: REChar; // is the match anchored (at beginning-of-line only)?
+ regmust: PRegExprChar; // string (pointer into program) that match must include, or nil
+ regmustlen: integer; // length of regmust string
+ regmustString: RegExprString;
+ // reganchored permits very fast decisions on suitable starting points
// for a match, cutting down the work a lot. Regmust permits fast rejection
// of lines that cannot possibly match. The regmust tests are costly enough
// that regcomp() supplies a regmust only if the r.e. contains something
// potentially expensive (at present, the only such thing detected is * or +
- // at the start of the r.e., which can involve a lot of backup). Regmlen is
+ // at the start of the r.e., which can involve a lot of backup). regmustlen is
// supplied because the test in regexec() needs it and regcomp() is computing
// it anyway.
- {$IFDEF UseFirstCharSet} //###0.929
- FirstCharSet : TSetOfREChar;
+
+ {$IFDEF UseFirstCharSet}
+ FirstCharSet: TRegExprCharset;
+ FirstCharArray: array[byte] of boolean;
{$ENDIF}
- // work variables for Exec's routins - save stack in recursion}
- reginput : PRegExprChar; // String-input pointer.
- fInputStart : PRegExprChar; // Pointer to first char of input string.
- fInputEnd : PRegExprChar; // Pointer to char AFTER last char of input string
+ // work variables for Exec routines - save stack in recursion
+ reginput: PRegExprChar; // String-input pointer.
+ fInputStart: PRegExprChar; // Pointer to first char of input string.
+ fInputEnd: PRegExprChar; // Pointer to char AFTER last char of input string
+ fRegexStart: PRegExprChar;
+ fRegexEnd: PRegExprChar;
// work variables for compiler's routines
- regparse : PRegExprChar; // Input-scan pointer.
- regnpar : PtrInt; // count.
- regdummy : char;
- regcode : PRegExprChar; // Code-emit pointer; @regdummy = don't.
- regsize : PtrInt; // Code size.
-
- regexpbeg : PRegExprChar; // only for error handling. Contains
- // pointer to beginning of r.e. while compiling
- fExprIsCompiled : boolean; // true if r.e. successfully compiled
+ regparse: PRegExprChar; // Input-scan pointer.
+ regnpar: integer; // Count of () brackets.
+ regdummy: REChar;
+ regcode: PRegExprChar; // Code-emit pointer; @regdummy = don't.
+ regsize: integer; // Total programm size in REChars.
+ regExactlyLen: PLongInt;
+ regexpBegin: PRegExprChar; // only for error handling. Contains pointer to beginning of r.e. while compiling
+ regexpIsCompiled: boolean; // true if r.e. successfully compiled
+ fSecondPass: boolean;
// programm is essentially a linear encoding
// of a nondeterministic finite-state machine (aka syntax charts or
@@ -241,289 +246,305 @@ type
// particular, the operand of a BRANCH node is the first node of the branch.
// (NB this is *not* a tree structure: the tail of the branch connects
// to the thing following the set of BRANCHes.) The opcodes are:
- programm : PRegExprChar; // Unwarranted chumminess with compiler.
+ programm: PRegExprChar; // Unwarranted chumminess with compiler.
+
+ fExpression: RegExprString; // source of compiled r.e.
+ fInputString: RegExprString; // input string
+ fLastError: integer; // see Error, LastError
+ fLastErrorOpcode: TREOp;
+
+ fModifiers: TRegExprModifiers; // modifiers
+ fCompModifiers: TRegExprModifiers; // compiler's copy of modifiers
+ fProgModifiers: TRegExprModifiers; // modifiers values from last programm compilation
- fExpression : PRegExprChar; // source of compiled r.e.
- fInputString : PRegExprChar; // input string
+ fSpaceChars: RegExprString;
+ fWordChars: RegExprString;
+ fInvertCase: TRegExprInvertCaseFunction;
- fLastError : integer; // see Error, LastError
+ fLineSeparators: RegExprString;
+ fLinePairedSeparatorAssigned: boolean;
+ fLinePairedSeparatorHead, fLinePairedSeparatorTail: REChar;
- fModifiers : integer; // modifiers
- fCompModifiers : integer; // compiler's copy of modifiers
- fProgModifiers : integer; // modifiers values from last programm compilation
+ FReplaceLineEnd: RegExprString; // string to use for "\n" in Substitute method
+ FUseOsLineEndOnReplace: boolean; // use OS LineBreak chars (LF or CRLF) for FReplaceLineEnd
- fSpaceChars : RegExprString; //###0.927
- fWordChars : RegExprString; //###0.929
- fInvertCase : TRegExprInvertCaseFunction; //###0.927
+ fSlowChecksSizeMax: integer;
+ // use ASlowChecks=True in Exec() only when Length(InputString)<SlowChecksSizeMax
+ // ASlowChecks enables to use regmustString optimization
- fLineSeparators : RegExprString; //###0.941
- fLinePairedSeparatorAssigned : boolean;
- fLinePairedSeparatorHead,
- fLinePairedSeparatorTail : REChar;
- FReplaceLineEnd: String;
{$IFNDEF UniCode}
- fLineSeparatorsSet : set of REChar;
+ fLineSepArray: array[byte] of boolean;
{$ENDIF}
{$IFDEF UnicodeWordDetection}
- FUseUnicodeWordDetection : Boolean;
- function IsUnicodeWordChar(AChar : REChar) : Boolean;
+ FUseUnicodeWordDetection: boolean;
{$ENDIF}
- function IsWordChar(AChar : REChar) : Boolean; inline;
- function IsSpaceChar(AChar : PRegExprChar) : Boolean; inline;
- function IsDigit(AChar : PRegExprChar) : Boolean; inline;
+
+ CharCheckers: TRegExprCharCheckerArray;
+ CharCheckerInfos: TRegExprCharCheckerInfos;
+ CheckerIndex_Word: byte;
+ CheckerIndex_NotWord: byte;
+ CheckerIndex_Digit: byte;
+ CheckerIndex_NotDigit: byte;
+ CheckerIndex_Space: byte;
+ CheckerIndex_NotSpace: byte;
+ CheckerIndex_HorzSep: byte;
+ CheckerIndex_NotHorzSep: byte;
+ CheckerIndex_VertSep: byte;
+ CheckerIndex_NotVertSep: byte;
+ CheckerIndex_LowerAZ: byte;
+ CheckerIndex_UpperAZ: byte;
+
+ procedure InitCharCheckers;
+ function CharChecker_Word(ch: REChar): boolean;
+ function CharChecker_NotWord(ch: REChar): boolean;
+ function CharChecker_Space(ch: REChar): boolean;
+ function CharChecker_NotSpace(ch: REChar): boolean;
+ function CharChecker_Digit(ch: REChar): boolean;
+ function CharChecker_NotDigit(ch: REChar): boolean;
+ function CharChecker_HorzSep(ch: REChar): boolean;
+ function CharChecker_NotHorzSep(ch: REChar): boolean;
+ function CharChecker_VertSep(ch: REChar): boolean;
+ function CharChecker_NotVertSep(ch: REChar): boolean;
+ function CharChecker_LowerAZ(ch: REChar): boolean;
+ function CharChecker_UpperAZ(ch: REChar): boolean;
+
+ procedure ClearMatches; {$IFDEF InlineFuncs}inline;{$ENDIF}
+ procedure ClearInternalIndexes; {$IFDEF InlineFuncs}inline;{$ENDIF}
+ function FindInCharClass(ABuffer: PRegExprChar; AChar: REChar; AIgnoreCase: boolean): boolean;
+ procedure GetCharSetFromCharClass(ABuffer: PRegExprChar; AIgnoreCase: boolean; var ARes: TRegExprCharset);
+ procedure GetCharSetFromSpaceChars(var ARes: TRegExprCharset);
+ procedure GetCharSetFromWordChars(var ARes: TRegExprCharSet);
+ function IsWordChar(AChar: REChar): boolean; {$IFDEF InlineFuncs}inline;{$ENDIF}
+ function IsSpaceChar(AChar: REChar): boolean; {$IFDEF InlineFuncs}inline;{$ENDIF}
+ function IsCustomLineSeparator(AChar: REChar): boolean; {$IFDEF InlineFuncs}inline;{$ENDIF}
+ procedure InitLineSepArray;
// Mark programm as having to be [re]compiled
procedure InvalidateProgramm;
// Check if we can use precompiled r.e. or
// [re]compile it if something changed
- function IsProgrammOk : boolean; //###0.941
-
- function GetExpression : RegExprString;
- procedure SetExpression (const s : RegExprString);
-
- function GetModifierStr : RegExprString;
- // Parse AModifiers string and return true and set AModifiersInt
- // if it's in format 'ismxrg-ismxrg'.
- class function ParseModifiersStr (const AModifiers : RegExprString;
- var AModifiersInt : integer) : boolean; //###0.941 class function now
- procedure SetModifierStr (const AModifiers : RegExprString);
-
- function GetModifier (AIndex : integer) : boolean;
- procedure SetModifier (AIndex : integer; ASet : boolean);
+ function IsProgrammOk: boolean; // ###0.941
+
+ procedure SetExpression(const AStr: RegExprString);
+
+ function GetModifierStr: RegExprString;
+ procedure SetModifierStr(const AStr: RegExprString);
+ function GetModifierG: boolean;
+ function GetModifierI: boolean;
+ function GetModifierM: boolean;
+ function GetModifierR: boolean;
+ function GetModifierS: boolean;
+ function GetModifierX: boolean;
+ procedure SetModifierG(AValue: boolean);
+ procedure SetModifierI(AValue: boolean);
+ procedure SetModifierM(AValue: boolean);
+ procedure SetModifierR(AValue: boolean);
+ procedure SetModifierS(AValue: boolean);
+ procedure SetModifierX(AValue: boolean);
// Default handler raises exception ERegExpr with
// Message = ErrorMsg (AErrorID), ErrorCode = AErrorID
// and CompilerErrorPos = value of property CompilerErrorPos.
- procedure Error (AErrorID : integer); virtual; // error handler.
-
+ procedure Error(AErrorID: integer); virtual; // error handler.
- {==================== Compiler section ===================}
+ { ==================== Compiler section =================== }
// compile a regular expression into internal code
- function CompileRegExpr (exp : PRegExprChar) : boolean;
- procedure SetUseOsLineEndOnReplace(AValue: Boolean);
+ function CompileRegExpr(ARegExp: PRegExprChar): boolean;
+ procedure SetUseOsLineEndOnReplace(AValue: boolean);
// set the next-pointer at the end of a node chain
- procedure Tail (p : PRegExprChar; val : PRegExprChar);
+ procedure Tail(p: PRegExprChar; val: PRegExprChar);
// regoptail - regtail on operand of first argument; nop if operandless
- procedure OpTail (p : PRegExprChar; val : PRegExprChar);
+ procedure OpTail(p: PRegExprChar; val: PRegExprChar);
// regnode - emit a node, return location
- function EmitNode (op : TREOp) : PRegExprChar;
+ function EmitNode(op: TREOp): PRegExprChar;
// emit (if appropriate) a byte of code
- procedure EmitC (b : REChar);
+ procedure EmitC(ch: REChar);
+
+ // emit LongInt value
+ procedure EmitInt(AValue: LongInt);
// insert an operator in front of already-emitted operand
// Means relocating the operand.
- procedure InsertOperator (op : TREOp; opnd : PRegExprChar; sz : integer); //###0.90
+ procedure InsertOperator(op: TREOp; opnd: PRegExprChar; sz: integer);
+ // ###0.90
// regular expression, i.e. main body or parenthesized thing
- function ParseReg (paren : integer; var flagp : integer) : PRegExprChar;
+ function ParseReg(paren: integer; var flagp: integer): PRegExprChar;
// one alternative of an | operator
- function ParseBranch (var flagp : integer) : PRegExprChar;
+ function ParseBranch(var flagp: integer): PRegExprChar;
// something followed by possible [*+?]
- function ParsePiece (var flagp : integer) : PRegExprChar;
+ function ParsePiece(var flagp: integer): PRegExprChar;
- function HexDig (ch : REChar) : PtrInt;
+ function HexDig(Ch: REChar): integer;
- function UnQuoteChar (var APtr : PRegExprChar) : REChar;
+ function UnQuoteChar(var APtr: PRegExprChar): REChar;
// the lowest level
- function ParseAtom (var flagp : integer) : PRegExprChar;
+ function ParseAtom(var flagp: integer): PRegExprChar;
// current pos in r.e. - for error hanling
- function GetCompilerErrorPos : PtrInt;
+ function GetCompilerErrorPos: PtrInt;
- {$IFDEF UseFirstCharSet} //###0.929
- procedure FillFirstCharSet (prog : PRegExprChar);
+ {$IFDEF UseFirstCharSet} // ###0.929
+ procedure FillFirstCharSet(prog: PRegExprChar);
{$ENDIF}
-
- {===================== Matching section ===================}
+ { ===================== Matching section =================== }
// repeatedly match something simple, report how many
- function regrepeat (p : PRegExprChar; AMax : PtrInt) : PtrInt;
+ function regrepeat(p: PRegExprChar; AMax: integer): integer;
// dig the "next" pointer out of a node
- function regnext (p : PRegExprChar) : PRegExprChar;
+ function regnext(p: PRegExprChar): PRegExprChar;
// recursively matching routine
- function MatchPrim (prog : PRegExprChar) : boolean;
+ function MatchPrim(prog: PRegExprChar): boolean;
+
+ // match at specific position only, called from ExecPrim
+ function MatchAtOnePos(APos: PRegExprChar): boolean; {$IFDEF InlineFuncs}inline;{$ENDIF}
// Exec for stored InputString
- function ExecPrim (AOffset: PtrInt) : boolean;
+ function ExecPrim(AOffset: integer; ATryOnce, ASlowChecks: boolean): boolean;
{$IFDEF RegExpPCodeDump}
- function DumpOp (op : REChar) : RegExprString;
- {$ENDIF}
-
- function GetSubExprMatchCount : integer;
- function GetMatchPos (Idx : integer) : PtrInt;
- function GetMatchLen (Idx : integer) : PtrInt;
- function GetMatch (Idx : integer) : RegExprString;
-
- function GetInputString : RegExprString;
- procedure SetInputString (const AInputString : RegExprString);
-
- {$IFNDEF UseSetOfChar}
- function StrScanCI (s : PRegExprChar; ch : REChar) : PRegExprChar; //###0.928
+ function DumpOp(op: TREOp): RegExprString;
{$ENDIF}
+ function GetSubExprCount: integer;
+ function GetMatchPos(Idx: integer): PtrInt;
+ function GetMatchLen(Idx: integer): PtrInt;
+ function GetMatch(Idx: integer): RegExprString;
- procedure SetLineSeparators (const AStr : RegExprString);
- procedure SetLinePairedSeparator (const AStr : RegExprString);
- function GetLinePairedSeparator : RegExprString;
+ procedure SetInputString(const AInputString: RegExprString);
+ procedure SetLineSeparators(const AStr: RegExprString);
+ procedure SetLinePairedSeparator(const AStr: RegExprString);
+ function GetLinePairedSeparator: RegExprString;
- public
+ public
constructor Create; overload;
- constructor Create(AExpression:string); overload;
+ constructor Create(const AExpression: RegExprString); overload;
destructor Destroy; override;
- class function VersionMajor : integer; //###0.944
- class function VersionMinor : integer; //###0.944
-
+ class function VersionMajor: integer;
+ class function VersionMinor: integer;
// match a programm against a string AInputString
// !!! Exec store AInputString into InputString property
// For Delphi 5 and higher available overloaded versions - first without
// parameter (uses already assigned to InputString property value)
- // and second that has PtrInt parameter and is same as ExecPos
- function Exec (const AInputString : RegExprString) : boolean; overload;
- function Exec : boolean; overload; //###0.949
- function Exec (AOffset: PtrInt) : boolean; overload; //###0.949
+ // and second that has int parameter and is same as ExecPos
+ function Exec(const AInputString: RegExprString): boolean; overload;
+ function Exec: boolean; overload;
+ function Exec(AOffset: integer): boolean; overload;
+
// find next match:
- // ExecNext;
+ // ExecNext;
// works the same as
- // if MatchLen [0] = 0 then ExecPos (MatchPos [0] + 1)
- // else ExecPos (MatchPos [0] + MatchLen [0]);
+ // if MatchLen [0] = 0 then ExecPos (MatchPos [0] + 1)
+ // else ExecPos (MatchPos [0] + MatchLen [0]);
// but it's more simpler !
// Raises exception if used without preceeding SUCCESSFUL call to
// Exec* (Exec, ExecPos, ExecNext). So You always must use something like
// if Exec (InputString) then repeat { proceed results} until not ExecNext;
- function ExecNext : boolean;
+ function ExecNext: boolean;
// find match for InputString starting from AOffset position
// (AOffset=1 - first char of InputString)
- function ExecPos (AOffset: PtrInt {$IFDEF DefParam}= 1{$ENDIF}) : boolean;
-
+ function ExecPos(AOffset: integer = 1): boolean; overload;
+ function ExecPos(AOffset: integer; ATryOnce: boolean): boolean; overload;
// Returns ATemplate with '$&' or '$0' replaced by whole r.e.
- // occurence and '$n' replaced by occurence of subexpression #n.
- // Since v.0.929 '$' used instead of '\' (for future extensions
- // and for more Perl-compatibility) and accept more then one digit.
- // If you want place into template raw '$' or '\', use prefix '\'
+ // occurence and '$1'...'$nn' replaced by subexpression with given index.
+ // Symbol '$' is used instead of '\' (for future extensions
+ // and for more Perl-compatibility) and accepts more than one digit.
+ // If you want to place into template raw '$' or '\', use prefix '\'.
// Example: '1\$ is $2\\rub\\' -> '1$ is <Match[2]>\rub\'
- // If you want to place raw digit after '$n' you must delimit
- // n with curly braces '{}'.
+ // If you want to place any number after '$' you must enclose it
+ // with curly braces: '${12}'.
// Example: 'a$12bc' -> 'a<Match[12]>bc'
// 'a${1}2bc' -> 'a<Match[1]>2bc'.
- function Substitute (const ATemplate : RegExprString) : RegExprString;
+ function Substitute(const ATemplate: RegExprString): RegExprString;
- // Split AInputStr into APieces by r.e. occurencies
- // Internally calls Exec[Next]
- procedure Split (Const AInputStr : RegExprString; APieces : TStrings);
+ // Splits AInputStr to list by positions of all r.e. occurencies.
+ // Internally calls Exec, ExecNext.
+ procedure Split(const AInputStr: RegExprString; APieces: TStrings);
- function Replace (Const AInputStr : RegExprString;
- const AReplaceStr : RegExprString;
- AUseSubstitution : boolean{$IFDEF DefParam}= False{$ENDIF}) //###0.946
- : RegExprString; overload;
+ function Replace(const AInputStr: RegExprString;
+ const AReplaceStr: RegExprString;
+ AUseSubstitution: boolean = False) // ###0.946
+ : RegExprString; overload;
+ function Replace(const AInputStr: RegExprString;
+ AReplaceFunc: TRegExprReplaceFunction): RegExprString; overload;
- function Replace (Const AInputStr : RegExprString;
- AReplaceFunc : TRegExprReplaceFunction)
- : RegExprString; overload;
- // Returns AInputStr with r.e. occurencies replaced by AReplaceStr
+ // Returns AInputStr with r.e. occurencies replaced by AReplaceStr.
// If AUseSubstitution is true, then AReplaceStr will be used
// as template for Substitution methods.
// For example:
- // Expression := '({-i}block|var)\s*\(\s*([^ ]*)\s*\)\s*';
- // Replace ('BLOCK( test1)', 'def "$1" value "$2"', True);
- // will return: def 'BLOCK' value 'test1'
- // Replace ('BLOCK( test1)', 'def "$1" value "$2"')
- // will return: def "$1" value "$2"
- // Internally calls Exec[Next]
- // Overloaded version and ReplaceEx operate with call-back function,
+ // Expression := '({-i}block|var)\s*\(\s*([^ ]*)\s*\)\s*';
+ // Replace ('BLOCK( test1)', 'def "$1" value "$2"', True);
+ // will return: def 'BLOCK' value 'test1'
+ // Replace ('BLOCK( test1)', 'def "$1" value "$2"')
+ // will return: def "$1" value "$2"
+ // Internally calls Exec, ExecNext.
+ // Overloaded version and ReplaceEx operate with callback function,
// so you can implement really complex functionality.
- function ReplaceEx (Const AInputStr : RegExprString;
- AReplaceFunc : TRegExprReplaceFunction):
- RegExprString;
+ function ReplaceEx(const AInputStr: RegExprString;
+ AReplaceFunc: TRegExprReplaceFunction): RegExprString;
// Returns ID of last error, 0 if no errors (unusable if
// Error method raises exception) and clear internal status
// into 0 (no errors).
- function LastError : integer;
+ function LastError: integer;
// Returns Error message for error with ID = AErrorID.
- function ErrorMsg (AErrorID : integer) : RegExprString; virtual;
-
-
+ function ErrorMsg(AErrorID: integer): RegExprString; virtual;
// Converts Ch into upper case if it in lower case or in lower
// if it in upper (uses current system local setings)
- class function InvertCaseFunction (const Ch : REChar) : REChar;
+ class function InvertCaseFunction(const Ch: REChar): REChar;
// [Re]compile r.e. Useful for example for GUI r.e. editors (to check
// all properties validity).
- procedure Compile; //###0.941
+ procedure Compile; // ###0.941
{$IFDEF RegExpPCodeDump}
// dump a compiled regexp in vaguely comprehensible form
- function Dump : RegExprString;
+ function Dump: RegExprString;
{$ENDIF}
// Regular expression.
// For optimization, TRegExpr will automatically compiles it into 'P-code'
// (You can see it with help of Dump method) and stores in internal
// structures. Real [re]compilation occures only when it really needed -
- // while calling Exec[Next], Substitute, Dump, etc
+ // while calling Exec, ExecNext, Substitute, Dump, etc
// and only if Expression or other P-code affected properties was changed
// after last [re]compilation.
// If any errors while [re]compilation occures, Error method is called
// (by default Error raises exception - see below)
- property Expression : RegExprString read GetExpression write SetExpression;
+ property Expression: RegExprString read fExpression write SetExpression;
// Set/get default values of r.e.syntax modifiers. Modifiers in
// r.e. (?ismx-ismx) will replace this default values.
// If you try to set unsupported modifier, Error will be called
// (by defaul Error raises exception ERegExpr).
- property ModifierStr : RegExprString read GetModifierStr write SetModifierStr;
-
- // Modifier /i - caseinsensitive, initialized from RegExprModifierI
- property ModifierI : boolean index 1 read GetModifier write SetModifier;
-
- // Modifier /r - use r.e.syntax extended for russian,
- // (was property ExtSyntaxEnabled in previous versions)
- // If true, then а-я additional include russian letter 'ё',
- // А-Я additional include 'Ё', and а-Я include all russian symbols.
- // You have to turn it off if it can interfere with you national alphabet.
- // , initialized from RegExprModifierR
- property ModifierR : boolean index 2 read GetModifier write SetModifier;
-
- // Modifier /s - '.' works as any char (else as [^\n]),
- // , initialized from RegExprModifierS
- property ModifierS : boolean index 3 read GetModifier write SetModifier;
-
- // Switching off modifier /g switchs all operators in
- // non-greedy style, so if ModifierG = False, then
- // all '*' works as '*?', all '+' as '+?' and so on.
- // , initialized from RegExprModifierG
- property ModifierG : boolean index 4 read GetModifier write SetModifier;
-
- // Treat string as multiple lines. That is, change `^' and `$' from
- // matching at only the very start or end of the string to the start
- // or end of any line anywhere within the string.
- // , initialized from RegExprModifierM
- property ModifierM : boolean index 5 read GetModifier write SetModifier;
-
- // Modifier /x - eXtended syntax, allow r.e. text formatting,
- // see description in the help. Initialized from RegExprModifierX
-
- property ModifierX : boolean index 6 read GetModifier write SetModifier;
+ property ModifierStr: RegExprString read GetModifierStr write SetModifierStr;
+
+ property ModifierI: boolean read GetModifierI write SetModifierI;
+ property ModifierR: boolean read GetModifierR write SetModifierR;
+ property ModifierS: boolean read GetModifierS write SetModifierS;
+ property ModifierG: boolean read GetModifierG write SetModifierG;
+ property ModifierM: boolean read GetModifierM write SetModifierM;
+ property ModifierX: boolean read GetModifierX write SetModifierX;
+
// returns current input string (from last Exec call or last assign
// to this property).
// Any assignment to this property clear Match* properties !
- property InputString : RegExprString read GetInputString write SetInputString;
+ property InputString: RegExprString read fInputString write SetInputString;
// Number of subexpressions has been found in last Exec* call.
// If there are no subexpr. but whole expr was found (Exec* returned True),
@@ -532,19 +553,19 @@ type
// Note, that some subexpr. may be not found and for such
// subexpr. MathPos=MatchLen=-1 and Match=''.
// For example: Expression := '(1)?2(3)?';
- // Exec ('123'): SubExprMatchCount=2, Match[0]='123', [1]='1', [2]='3'
- // Exec ('12'): SubExprMatchCount=1, Match[0]='12', [1]='1'
- // Exec ('23'): SubExprMatchCount=2, Match[0]='23', [1]='', [2]='3'
- // Exec ('2'): SubExprMatchCount=0, Match[0]='2'
- // Exec ('7') - return False: SubExprMatchCount=-1
- property SubExprMatchCount : integer read GetSubExprMatchCount;
+ // Exec ('123'): SubExprMatchCount=2, Match[0]='123', [1]='1', [2]='3'
+ // Exec ('12'): SubExprMatchCount=1, Match[0]='12', [1]='1'
+ // Exec ('23'): SubExprMatchCount=2, Match[0]='23', [1]='', [2]='3'
+ // Exec ('2'): SubExprMatchCount=0, Match[0]='2'
+ // Exec ('7') - return False: SubExprMatchCount=-1
+ property SubExprMatchCount: integer read GetSubExprCount;
// pos of entrance subexpr. #Idx into tested in last Exec*
// string. First subexpr. has Idx=1, last - MatchCount,
// whole r.e. has Idx=0.
// Returns -1 if in r.e. no such subexpr. or this subexpr.
// not found in input string.
- property MatchPos [Idx : integer] : PtrInt read GetMatchPos;
+ property MatchPos[Idx: integer]: PtrInt read GetMatchPos;
// len of entrance subexpr. #Idx r.e. into tested in last Exec*
// string. First subexpr. has Idx=1, last - MatchCount,
@@ -552,1122 +573,1658 @@ type
// Returns -1 if in r.e. no such subexpr. or this subexpr.
// not found in input string.
// Remember - MatchLen may be 0 (if r.e. match empty string) !
- property MatchLen [Idx : integer] : PtrInt read GetMatchLen;
+ property MatchLen[Idx: integer]: PtrInt read GetMatchLen;
// == copy (InputString, MatchPos [Idx], MatchLen [Idx])
// Returns '' if in r.e. no such subexpr. or this subexpr.
// not found in input string.
- property Match [Idx : integer] : RegExprString read GetMatch;
+ property Match[Idx: integer]: RegExprString read GetMatch;
// Returns position in r.e. where compiler stopped.
// Useful for error diagnostics
- property CompilerErrorPos : PtrInt read GetCompilerErrorPos;
+ property CompilerErrorPos: PtrInt read GetCompilerErrorPos;
// Contains chars, treated as /s (initially filled with RegExprSpaceChars
// global constant)
- property SpaceChars : RegExprString read fSpaceChars write fSpaceChars; //###0.927
+ property SpaceChars: RegExprString read fSpaceChars write fSpaceChars;
+ // ###0.927
// Contains chars, treated as /w (initially filled with RegExprWordChars
// global constant)
- property WordChars : RegExprString read fWordChars write fWordChars; //###0.929
+ property WordChars: RegExprString read fWordChars write fWordChars;
+ // ###0.929
{$IFDEF UnicodeWordDetection}
// If set to true, in addition to using WordChars, a heuristic to detect unicode word letters is used for \w
- Property UseUnicodeWordDetection : Boolean Read FUseUnicodeWordDetection Write FUseUnicodeWordDetection;
+ property UseUnicodeWordDetection: boolean read FUseUnicodeWordDetection write FUseUnicodeWordDetection;
{$ENDIF}
// line separators (like \n in Unix)
- property LineSeparators : RegExprString read fLineSeparators write SetLineSeparators; //###0.941
+ property LineSeparators: RegExprString read fLineSeparators write SetLineSeparators; // ###0.941
// paired line separator (like \r\n in DOS and Windows).
// must contain exactly two chars or no chars at all
- property LinePairedSeparator : RegExprString read GetLinePairedSeparator write SetLinePairedSeparator; //###0.941
-
+ property LinePairedSeparator: RegExprString read GetLinePairedSeparator write SetLinePairedSeparator; // ###0.941
// Set this property if you want to override case-insensitive functionality.
// Create set it to RegExprInvertCaseFunction (InvertCaseFunction by default)
- property InvertCase : TRegExprInvertCaseFunction read fInvertCase write fInvertCase; //##0.935
+ property InvertCase: TRegExprInvertCaseFunction read fInvertCase write fInvertCase; // ##0.935
// Use OS line end on replace or not. Default is True for backwards compatibility.
// Set to false to use #10.
- Property UseOsLineEndOnReplace : Boolean Read FUseOsLineEndOnReplace Write SetUseOsLineEndOnReplace;
+ property UseOsLineEndOnReplace: boolean read FUseOsLineEndOnReplace write SetUseOsLineEndOnReplace;
+
+ property SlowChecksSizeMax: integer read fSlowChecksSizeMax write fSlowChecksSizeMax;
end;
- ERegExpr = class (Exception)
- public
- ErrorCode : integer;
- CompilerErrorPos : PtrInt;
+type
+ ERegExpr = class(Exception)
+ public
+ ErrorCode: integer;
+ CompilerErrorPos: PtrInt;
end;
const
- // default for InvertCase property:
- RegExprInvertCaseFunction : TRegExprInvertCaseFunction = nil ;
+ RegExprInvertCaseFunction: TRegExprInvertCaseFunction = nil;
-// true if string AInputString match regular expression ARegExpr
-// ! will raise exeption if syntax errors in ARegExpr
-function ExecRegExpr (const ARegExpr, AInputStr : RegExprString) : boolean;
+ // true if string AInputString match regular expression ARegExpr
+ // ! will raise exeption if syntax errors in ARegExpr
+function ExecRegExpr(const ARegExpr, AInputStr: RegExprString): boolean;
// Split AInputStr into APieces by r.e. ARegExpr occurencies
-procedure SplitRegExpr (const ARegExpr, AInputStr : RegExprString; APieces : TStrings);
+procedure SplitRegExpr(const ARegExpr, AInputStr: RegExprString;
+ APieces: TStrings);
// Returns AInputStr with r.e. occurencies replaced by AReplaceStr
// If AUseSubstitution is true, then AReplaceStr will be used
// as template for Substitution methods.
// For example:
-// ReplaceRegExpr ('({-i}block|var)\s*\(\s*([^ ]*)\s*\)\s*',
-// 'BLOCK( test1)', 'def "$1" value "$2"', True)
-// will return: def 'BLOCK' value 'test1'
-// ReplaceRegExpr ('({-i}block|var)\s*\(\s*([^ ]*)\s*\)\s*',
-// 'BLOCK( test1)', 'def "$1" value "$2"')
-// will return: def "$1" value "$2"
-function ReplaceRegExpr (const ARegExpr, AInputStr, AReplaceStr : RegExprString;
- AUseSubstitution : boolean{$IFDEF DefParam}= False{$ENDIF}) : RegExprString; overload; //###0.947
+// ReplaceRegExpr ('({-i}block|var)\s*\(\s*([^ ]*)\s*\)\s*',
+// 'BLOCK( test1)', 'def "$1" value "$2"', True)
+// will return: def 'BLOCK' value 'test1'
+// ReplaceRegExpr ('({-i}block|var)\s*\(\s*([^ ]*)\s*\)\s*',
+// 'BLOCK( test1)', 'def "$1" value "$2"')
+// will return: def "$1" value "$2"
+function ReplaceRegExpr(const ARegExpr, AInputStr, AReplaceStr: RegExprString;
+ AUseSubstitution: boolean = False): RegExprString; overload; // ###0.947
// Alternate form allowing to set more parameters.
-Type
- TRegexReplaceOption = (rroModifierI,
- rroModifierR,
- rroModifierS,
- rroModifierG,
- rroModifierM,
- rroModifierX,
- rroUseSubstitution,
- rroUseOsLineEnd);
- TRegexReplaceOptions = Set of TRegexReplaceOption;
-
-function ReplaceRegExpr (const ARegExpr, AInputStr, AReplaceStr : RegExprString; Options :TRegexReplaceOptions) : RegExprString; overload;
+type
+ TRegexReplaceOption = (
+ rroModifierI,
+ rroModifierR,
+ rroModifierS,
+ rroModifierG,
+ rroModifierM,
+ rroModifierX,
+ rroUseSubstitution,
+ rroUseOsLineEnd
+ );
+ TRegexReplaceOptions = set of TRegexReplaceOption;
+
+function ReplaceRegExpr(const ARegExpr, AInputStr, AReplaceStr: RegExprString;
+ Options: TRegexReplaceOptions): RegExprString; overload;
// Replace all metachars with its safe representation,
// for example 'abc$cd.(' converts into 'abc\$cd\.\('
// This function useful for r.e. autogeneration from
// user input
-function QuoteRegExprMetaChars (const AStr : RegExprString) : RegExprString;
+function QuoteRegExprMetaChars(const AStr: RegExprString): RegExprString;
// Makes list of subexpressions found in ARegExpr r.e.
// In ASubExps every item represent subexpression,
// from first to last, in format:
-// String - subexpression text (without '()')
-// low word of Object - starting position in ARegExpr, including '('
-// if exists! (first position is 1)
-// high word of Object - length, including starting '(' and ending ')'
-// if exist!
+// String - subexpression text (without '()')
+// low word of Object - starting position in ARegExpr, including '('
+// if exists! (first position is 1)
+// high word of Object - length, including starting '(' and ending ')'
+// if exist!
// AExtendedSyntax - must be True if modifier /m will be On while
// using the r.e.
// Useful for GUI editors of r.e. etc (You can find example of using
// in TestRExp.dpr project)
// Returns
-// 0 Success. No unbalanced brackets was found;
-// -1 There are not enough closing brackets ')';
-// -(n+1) At position n was found opening '[' without //###0.942
-// corresponding closing ']';
-// n At position n was found closing bracket ')' without
-// corresponding opening '('.
+// 0 Success. No unbalanced brackets was found;
+// -1 There are not enough closing brackets ')';
+// -(n+1) At position n was found opening '[' without //###0.942
+// corresponding closing ']';
+// n At position n was found closing bracket ')' without
+// corresponding opening '('.
// If Result <> 0, then ASubExpr can contain empty items or illegal ones
-function RegExprSubExpressions (const ARegExpr : string;
- ASubExprs : TStrings; AExtendedSyntax : boolean{$IFDEF DefParam}= False{$ENDIF}) : PtrInt;
-
+function RegExprSubExpressions(const ARegExpr: string; ASubExprs: TStrings;
+ AExtendedSyntax: boolean= False): integer;
implementation
+
{$IFDEF UnicodeWordDetection}
uses
UnicodeData;
{$ENDIF}
const
- // TRegExpr.VersionMajor/Minor return values of these constants:
- TRegExprVersionMajor : integer = 0;
- TRegExprVersionMinor : integer = 952;
-
- MaskModI = 1; // modifier /i bit in fModifiers
- MaskModR = 2; // -"- /r
- MaskModS = 4; // -"- /s
- MaskModG = 8; // -"- /g
- MaskModM = 16; // -"- /m
- MaskModX = 32; // -"- /x
-
- {$IFDEF UniCode}
- XIgnoredChars = ' '#9#$d#$a;
- {$ELSE}
- XIgnoredChars = [' ', #9, #$d, #$a];
- {$ENDIF}
-
- function AlignToPtr(const p: Pointer): Pointer; inline;
- begin
- {$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT}
- Result := Align(p, SizeOf(Pointer));
- {$ELSE}
- Result := p;
- {$ENDIF}
- end;
-
- function AlignToInt(const p: Pointer): Pointer; inline;
- begin
- {$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT}
- Result := Align(p, SizeOf(integer));
- {$ELSE}
- Result := p;
- {$ENDIF}
- end;
-
-{=============================================================}
-{===================== Global functions ======================}
-{=============================================================}
-
-function ExecRegExpr (const ARegExpr, AInputStr : RegExprString) : boolean;
+ // TRegExpr.VersionMajor/Minor return values of these constants:
+ REVersionMajor = 0;
+ REVersionMinor = 987;
+ OpKind_End = REChar(1);
+ OpKind_MetaClass = REChar(2);
+ OpKind_Range = REChar(3);
+ OpKind_Char = REChar(4);
+
+ RegExprAllSet = [0 .. 255];
+ RegExprDigitSet = [Ord('0') .. Ord('9')];
+ RegExprLowerAzSet = [Ord('a') .. Ord('z')];
+ RegExprUpperAzSet = [Ord('A') .. Ord('Z')];
+ RegExprAllAzSet = RegExprLowerAzSet + RegExprUpperAzSet;
+ RegExprLineSeparatorsSet = [$d, $a, $b, $c] {$IFDEF UniCode} + [$85] {$ENDIF};
+ RegExprHorzSeparatorsSet = [9, $20, $A0];
+
+ MaxBracesArg = $7FFFFFFF - 1; // max value for {n,m} arguments //###0.933
+
+type
+ TRENextOff = PtrInt;
+ // internal Next "pointer" (offset to current p-code) //###0.933
+ PRENextOff = ^TRENextOff;
+ // used for extracting Next "pointers" from compiled r.e. //###0.933
+ TREBracesArg = integer; // type of {m,n} arguments
+ PREBracesArg = ^TREBracesArg;
+
+const
+ REOpSz = SizeOf(TREOp) div SizeOf(REChar);
+ // size of OP_ command in REChars
+ {$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT}
+ // add space for aligning pointer
+ // -1 is the correct max size but also needed for InsertOperator that needs a multiple of pointer size
+ RENextOffSz = (2 * SizeOf(TRENextOff) div SizeOf(REChar)) - 1;
+ REBracesArgSz = (2 * SizeOf(TREBracesArg) div SizeOf(REChar));
+ // add space for aligning pointer
+ {$ELSE}
+ RENextOffSz = (SizeOf(TRENextOff) div SizeOf(REChar));
+ // size of Next pointer in REChars
+ REBracesArgSz = SizeOf(TREBracesArg) div SizeOf(REChar);
+ // size of BRACES arguments in REChars
+ {$ENDIF}
+ RENumberSz = SizeOf(LongInt) div SizeOf(REChar);
+
+function _FindCharInBuffer(SBegin, SEnd: PRegExprChar; Ch: REChar): PRegExprChar; {$IFDEF InlineFuncs}inline;{$ENDIF}
begin
- With TRegExpr.Create do
- try
- Expression := ARegExpr;
- Result := Exec (AInputStr);
- finally
- Free;
+ while SBegin < SEnd do
+ begin
+ if SBegin^ = Ch then
+ begin
+ Result := SBegin;
+ Exit;
end;
- end; { of function ExecRegExpr
---------------------------------------------------------------}
+ Inc(SBegin);
+ end;
+ Result := nil;
+end;
-procedure SplitRegExpr (const ARegExpr, AInputStr : RegExprString; APieces : TStrings);
+function IsIgnoredChar(AChar: REChar): boolean; {$IFDEF InlineFuncs}inline;{$ENDIF}
+begin
+ case AChar of
+ ' ', #9, #$d, #$a:
+ Result := True
+ else
+ Result := False;
+ end;
+end;
+
+function _IsMetaChar(AChar: REChar): boolean; {$IFDEF InlineFuncs}inline;{$ENDIF}
+begin
+ case AChar of
+ 'd', 'D',
+ 's', 'S',
+ 'w', 'W',
+ 'v', 'V',
+ 'h', 'H':
+ Result := True
+ else
+ Result := False;
+ end;
+end;
+function AlignToPtr(const p: Pointer): Pointer; {$IFDEF InlineFuncs}inline;{$ENDIF}
+begin
+ {$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT}
+ Result := Align(p, SizeOf(Pointer));
+ {$ELSE}
+ Result := p;
+ {$ENDIF}
+end;
+
+function AlignToInt(const p: Pointer): Pointer; {$IFDEF InlineFuncs}inline;{$ENDIF}
+begin
+ {$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT}
+ Result := Align(p, SizeOf(integer));
+ {$ELSE}
+ Result := p;
+ {$ENDIF}
+end;
+
+function _UpperCase(Ch: REChar): REChar;
+begin
+ Result := Ch;
+ if (Ch >= 'a') and (Ch <= 'z') then
+ begin
+ Dec(Result, 32);
+ Exit;
+ end;
+ if Ord(Ch) < 128 then
+ Exit;
+
+ {$IFDEF FPC}
+ {$IFDEF UniCode}
+ Result := UnicodeUpperCase(Ch)[1];
+ {$ELSE}
+ Result := AnsiUpperCase(Ch)[1];
+ {$ENDIF}
+ {$ELSE}
+ {$IFDEF UniCode}
+ {$IFDEF D2009}
+ Result := TCharacter.ToUpper(Ch);
+ {$ENDIF}
+ {$ELSE}
+ Result := AnsiUpperCase(Ch)[1];
+ {$ENDIF}
+ {$ENDIF}
+end;
+
+function _LowerCase(Ch: REChar): REChar;
+begin
+ Result := Ch;
+ if (Ch >= 'A') and (Ch <= 'Z') then
+ begin
+ Inc(Result, 32);
+ Exit;
+ end;
+ if Ord(Ch) < 128 then
+ Exit;
+
+ {$IFDEF FPC}
+ {$IFDEF UniCode}
+ Result := UnicodeLowerCase(Ch)[1];
+ {$ELSE}
+ Result := AnsiLowerCase(Ch)[1];
+ {$ENDIF}
+ {$ELSE}
+ {$IFDEF UniCode}
+ {$IFDEF D2009}
+ Result := TCharacter.ToLower(Ch);
+ {$ENDIF}
+ {$ELSE}
+ Result := AnsiLowerCase(Ch)[1];
+ {$ENDIF}
+ {$ENDIF}
+end;
+
+{ ============================================================= }
+{ ===================== Global functions ====================== }
+{ ============================================================= }
+
+function IsModifiersEqual(const A, B: TRegExprModifiers): boolean;
+begin
+ Result :=
+ (A.I = B.I) and
+ (A.G = B.G) and
+ (A.M = B.M) and
+ (A.S = B.S) and
+ (A.R = B.R) and
+ (A.X = B.X);
+end;
+
+function ParseModifiers(const APtr: PRegExprChar;
+ ALen: integer;
+ var AValue: TRegExprModifiers): boolean;
+// Parse string and set AValue if it's in format 'ismxrg-ismxrg'
+var
+ IsOn: boolean;
+ i: integer;
+begin
+ Result := True;
+ IsOn := True;
+ for i := 0 to ALen-1 do
+ case APtr[i] of
+ '-':
+ IsOn := False;
+ 'I', 'i':
+ AValue.I := IsOn;
+ 'R', 'r':
+ AValue.R := IsOn;
+ 'S', 's':
+ AValue.S := IsOn;
+ 'G', 'g':
+ AValue.G := IsOn;
+ 'M', 'm':
+ AValue.M := IsOn;
+ 'X', 'x':
+ AValue.X := IsOn;
+ else
+ begin
+ Result := False;
+ Exit;
+ end;
+ end;
+end;
+
+function ExecRegExpr(const ARegExpr, AInputStr: RegExprString): boolean;
+var
+ r: TRegExpr;
+begin
+ r := TRegExpr.Create;
+ try
+ r.Expression := ARegExpr;
+ Result := r.Exec(AInputStr);
+ finally
+ r.Free;
+ end;
+end; { of function ExecRegExpr
+ -------------------------------------------------------------- }
+
+procedure SplitRegExpr(const ARegExpr, AInputStr: RegExprString;
+ APieces: TStrings);
+var
+ r: TRegExpr;
begin
APieces.Clear;
- With TRegExpr.Create do
- try
- Expression := ARegExpr;
- Split (AInputStr, APieces);
- finally
- Free;
- end;
+ r := TRegExpr.Create;
+ try
+ r.Expression := ARegExpr;
+ r.Split(AInputStr, APieces);
+ finally
+ r.Free;
+ end;
end; { of procedure SplitRegExpr
---------------------------------------------------------------}
+ -------------------------------------------------------------- }
-function ReplaceRegExpr (const ARegExpr, AInputStr, AReplaceStr : RegExprString;
- AUseSubstitution : boolean{$IFDEF DefParam}= False{$ENDIF}) : RegExprString; overload;
+function ReplaceRegExpr(const ARegExpr, AInputStr, AReplaceStr: RegExprString;
+ AUseSubstitution: boolean= False): RegExprString;
begin
with TRegExpr.Create do
try
Expression := ARegExpr;
- Result := Replace (AInputStr, AReplaceStr, AUseSubstitution);
+ Result := Replace(AInputStr, AReplaceStr, AUseSubstitution);
finally
Free;
end;
end; { of function ReplaceRegExpr
---------------------------------------------------------------}
+ -------------------------------------------------------------- }
-function ReplaceRegExpr (const ARegExpr, AInputStr, AReplaceStr : RegExprString; Options :TRegexReplaceOptions) : RegExprString; overload;
+
+function ReplaceRegExpr(const ARegExpr, AInputStr, AReplaceStr: RegExprString;
+ Options: TRegexReplaceOptions): RegExprString; overload;
begin
- with TRegExpr.Create do
- try
- ModifierI:=(rroModifierI in Options);
- ModifierR:=(rroModifierR in Options);
- ModifierS:=(rroModifierS in Options);
- ModifierG:=(rroModifierG in Options);
- ModifierM:=(rroModifierM in Options);
- ModifierX:=(rroModifierX in Options);
- // Set this after the above, if the regex contains modifiers, they will be applied.
- Expression := ARegExpr;
- UseOsLineEndOnReplace:=(rroUseOsLineEnd in Options);
- Result := Replace (AInputStr, AReplaceStr, rroUseSubstitution in options);
- finally
- Free;
- end;
+ with TRegExpr.Create do
+ try
+ ModifierI := (rroModifierI in Options);
+ ModifierR := (rroModifierR in Options);
+ ModifierS := (rroModifierS in Options);
+ ModifierG := (rroModifierG in Options);
+ ModifierM := (rroModifierM in Options);
+ ModifierX := (rroModifierX in Options);
+ // Set this after the above, if the regex contains modifiers, they will be applied.
+ Expression := ARegExpr;
+ UseOsLineEndOnReplace := (rroUseOsLineEnd in Options);
+ Result := Replace(AInputStr, AReplaceStr, rroUseSubstitution in Options);
+ finally
+ Free;
+ end;
end;
-function QuoteRegExprMetaChars (const AStr : RegExprString) : RegExprString;
- const
- RegExprMetaSet : RegExprString = '^$.[()|?+*'+EscChar+'{'
- + ']}'; // - this last are additional to META.
- // Very similar to META array, but slighly changed.
- // !Any changes in META array must be synchronized with this set.
- var
- i, i0, Len : PtrInt;
+(*
+const
+ MetaChars_Init = '^$.[()|?+*' + EscChar + '{';
+ MetaChars = MetaChars_Init; // not needed to be a variable, const is faster
+ MetaAll = MetaChars_Init + ']}'; // Very similar to MetaChars, but slighly changed.
+*)
+
+function _IsMetaSymbol1(ch: REChar): boolean; {$IFDEF InlineFuncs}inline;{$ENDIF}
+begin
+ case ch of
+ '^', '$', '.', '[', '(', ')', '|', '?', '+', '*', EscChar, '{':
+ Result := True
+ else
+ Result := False
+ end;
+end;
+
+function _IsMetaSymbol2(ch: REChar): boolean; {$IFDEF InlineFuncs}inline;{$ENDIF}
+begin
+ case ch of
+ '^', '$', '.', '[', '(', ')', '|', '?', '+', '*', EscChar, '{',
+ ']', '}':
+ Result := True
+ else
+ Result := False
+ end;
+end;
+
+function QuoteRegExprMetaChars(const AStr: RegExprString): RegExprString;
+var
+ i, i0, Len: integer;
+ ch: REChar;
begin
Result := '';
- Len := length (AStr);
+ Len := Length(AStr);
i := 1;
i0 := i;
- while i <= Len do begin
- if Pos (AStr [i], RegExprMetaSet) > 0 then begin
- Result := Result + System.Copy (AStr, i0, i - i0)
- + EscChar + AStr [i];
+ while i <= Len do
+ begin
+ ch := AStr[i];
+ if _IsMetaSymbol2(ch) then
+ begin
+ Result := Result + System.Copy(AStr, i0, i - i0) + EscChar + ch;
i0 := i + 1;
- end;
- inc (i);
- end;
- Result := Result + System.Copy (AStr, i0, MaxInt); // Tail
+ end;
+ Inc(i);
+ end;
+ Result := Result + System.Copy(AStr, i0, MaxInt); // Tail
end; { of function QuoteRegExprMetaChars
---------------------------------------------------------------}
+ -------------------------------------------------------------- }
+
+function RegExprSubExpressions(const ARegExpr: string; ASubExprs: TStrings;
+ AExtendedSyntax: boolean = False): integer;
+type
+ TStackItemRec = record // ###0.945
+ SubExprIdx: integer;
+ StartPos: PtrInt;
+ end;
-function RegExprSubExpressions (const ARegExpr : string;
- ASubExprs : TStrings; AExtendedSyntax : boolean{$IFDEF DefParam}= False{$ENDIF}) : PtrInt;
- type
- TStackItemRec = record //###0.945
- SubExprIdx : integer;
- StartPos : PtrInt;
- end;
TStackArray = packed array [0 .. NSUBEXPMAX - 1] of TStackItemRec;
- var
- Len, SubExprLen : PtrInt;
- i, i0 : PtrInt;
- Modif : integer;
- Stack : ^TStackArray; //###0.945
- StackIdx, StackSz : PtrInt;
- begin
+var
+ Len, SubExprLen: integer;
+ i, i0: integer;
+ Modif: TRegExprModifiers;
+ Stack: ^TStackArray; // ###0.945
+ StackIdx, StackSz: integer;
+begin
Result := 0; // no unbalanced brackets found at this very moment
- Modif:=0;
+ Modif:=Default(TRegExprModifiers);
ASubExprs.Clear; // I don't think that adding to non empty list
// can be useful, so I simplified algorithm to work only with empty list
- Len := length (ARegExpr); // some optimization tricks
+ Len := Length(ARegExpr); // some optimization tricks
// first we have to calculate number of subexpression to reserve
// space in Stack array (may be we'll reserve more than needed, but
// it's faster then memory reallocation during parsing)
StackSz := 1; // add 1 for entire r.e.
for i := 1 to Len do
- if ARegExpr [i] = '('
- then inc (StackSz);
-// SetLength (Stack, StackSz); //###0.945
- GetMem (Stack, SizeOf (TStackItemRec) * StackSz);
- try
+ if ARegExpr[i] = '(' then
+ Inc(StackSz);
+ // SetLength (Stack, StackSz); //###0.945
+ GetMem(Stack, SizeOf(TStackItemRec) * StackSz);
- StackIdx := 0;
- i := 1;
- while (i <= Len) do begin
- case ARegExpr [i] of
- '(': begin
- if (i < Len) and (ARegExpr [i + 1] = '?') then begin
- // this is not subexpression, but comment or other
- // Perl extension. We must check is it (?ismxrg-ismxrg)
- // and change AExtendedSyntax if /x is changed.
- inc (i, 2); // skip '(?'
- i0 := i;
- while (i <= Len) and (ARegExpr [i] <> ')')
- do inc (i);
- if i > Len
- then Result := -1 // unbalansed '('
+ try
+ StackIdx := 0;
+ i := 1;
+ while (i <= Len) do
+ begin
+ case ARegExpr[i] of
+ '(':
+ begin
+ if (i < Len) and (ARegExpr[i + 1] = '?') then
+ begin
+ // this is not subexpression, but comment or other
+ // Perl extension. We must check is it (?ismxrg-ismxrg)
+ // and change AExtendedSyntax if /x is changed.
+ Inc(i, 2); // skip '(?'
+ i0 := i;
+ while (i <= Len) and (ARegExpr[i] <> ')') do
+ Inc(i);
+ if i > Len then
+ Result := -1 // unbalansed '('
+ else
+ if ParseModifiers(@ARegExpr[i0], i - i0, Modif) then
+ // Alexey-T: original code had copy from i, not from i0
+ AExtendedSyntax := Modif.X;
+ end
else
- if TRegExpr.ParseModifiersStr (System.Copy (ARegExpr, i, i - i0), Modif)
- then AExtendedSyntax := (Modif and MaskModX) <> 0;
- end
- else begin // subexpression starts
- ASubExprs.Add (''); // just reserve space
- with Stack [StackIdx] do begin
- SubExprIdx := ASubExprs.Count - 1;
- StartPos := i;
+ begin // subexpression starts
+ ASubExprs.Add(''); // just reserve space
+ with Stack[StackIdx] do
+ begin
+ SubExprIdx := ASubExprs.Count - 1;
+ StartPos := i;
+ end;
+ Inc(StackIdx);
end;
- inc (StackIdx);
end;
- end;
- ')': begin
- if StackIdx = 0
- then Result := i // unbalanced ')'
- else begin
- dec (StackIdx);
- with Stack [StackIdx] do begin
- SubExprLen := i - StartPos + 1;
- ASubExprs.Objects [SubExprIdx] :=
- TObject (StartPos or (SubExprLen ShL 16));
- ASubExprs [SubExprIdx] := System.Copy (
- ARegExpr, StartPos + 1, SubExprLen - 2); // add without brackets
+ ')':
+ begin
+ if StackIdx = 0 then
+ Result := i // unbalanced ')'
+ else
+ begin
+ Dec(StackIdx);
+ with Stack[StackIdx] do
+ begin
+ SubExprLen := i - StartPos + 1;
+ ASubExprs.Objects[SubExprIdx] :=
+ TObject(StartPos or (SubExprLen ShL 16));
+ ASubExprs[SubExprIdx] := System.Copy(ARegExpr, StartPos + 1,
+ SubExprLen - 2); // add without brackets
+ end;
end;
end;
- end;
- EscChar: inc (i); // skip quoted symbol
- '[': begin
- // we have to skip character ranges at once, because they can
- // contain '#', and '#' in it must NOT be recognized as eXtended
- // comment beginning!
- i0 := i;
- inc (i);
- if ARegExpr [i] = ']' // cannot be 'emty' ranges - this interpretes
- then inc (i); // as ']' by itself
- while (i <= Len) and (ARegExpr [i] <> ']') do
- if ARegExpr [i] = EscChar //###0.942
- then inc (i, 2) // skip 'escaped' char to prevent stopping at '\]'
- else inc (i);
- if (i > Len) or (ARegExpr [i] <> ']') //###0.942
- then Result := - (i0 + 1); // unbalansed '[' //###0.942
- end;
- '#': if AExtendedSyntax then begin
- // skip eXtended comments
- while (i <= Len) and (ARegExpr [i] <> #$d) and (ARegExpr [i] <> #$a)
- // do not use [#$d, #$a] due to UniCode compatibility
- do inc (i);
- while (i + 1 <= Len) and ((ARegExpr [i + 1] = #$d) or (ARegExpr [i + 1] = #$a))
- do inc (i); // attempt to work with different kinds of line separators
- // now we are at the line separator that must be skipped.
- end;
- // here is no 'else' clause - we simply skip ordinary chars
- end; // of case
- inc (i); // skip scanned char
- // ! can move after Len due to skipping quoted symbol
- end;
-
- // check brackets balance
- if StackIdx <> 0
- then Result := -1; // unbalansed '('
-
- // check if entire r.e. added
- if (ASubExprs.Count = 0)
- or ((PtrInt (ASubExprs.Objects [0]) and $FFFF) <> 1)
- or (((PtrInt (ASubExprs.Objects [0]) ShR 16) and $FFFF) <> Len)
+ EscChar:
+ Inc(i); // skip quoted symbol
+ '[':
+ begin
+ // we have to skip character ranges at once, because they can
+ // contain '#', and '#' in it must NOT be recognized as eXtended
+ // comment beginning!
+ i0 := i;
+ Inc(i);
+ if ARegExpr[i] = ']' // first ']' inside [] treated as simple char, no need to check '['
+ then
+ Inc(i);
+ while (i <= Len) and (ARegExpr[i] <> ']') do
+ if ARegExpr[i] = EscChar // ###0.942
+ then
+ Inc(i, 2) // skip 'escaped' char to prevent stopping at '\]'
+ else
+ Inc(i);
+ if (i > Len) or (ARegExpr[i] <> ']') // ###0.942
+ then
+ Result := -(i0 + 1); // unbalansed '[' //###0.942
+ end;
+ '#':
+ if AExtendedSyntax then
+ begin
+ // skip eXtended comments
+ while (i <= Len) and (ARegExpr[i] <> #$d) and (ARegExpr[i] <> #$a)
+ // do not use [#$d, #$a] due to UniCode compatibility
+ do
+ Inc(i);
+ while (i + 1 <= Len) and
+ ((ARegExpr[i + 1] = #$d) or (ARegExpr[i + 1] = #$a)) do
+ Inc(i); // attempt to work with different kinds of line separators
+ // now we are at the line separator that must be skipped.
+ end;
+ // here is no 'else' clause - we simply skip ordinary chars
+ end; // of case
+ Inc(i); // skip scanned char
+ // ! can move after Len due to skipping quoted symbol
+ end;
+
+ // check brackets balance
+ if StackIdx <> 0 then
+ Result := -1; // unbalansed '('
+
+ // check if entire r.e. added
+ if (ASubExprs.Count = 0) or ((PtrInt(ASubExprs.Objects[0]) and $FFFF) <> 1)
+ or (((PtrInt(ASubExprs.Objects[0]) ShR 16) and $FFFF) <> Len)
// whole r.e. wasn't added because it isn't bracketed
// well, we add it now:
- then ASubExprs.InsertObject (0, ARegExpr, TObject ((Len ShL 16) or 1));
+ then
+ ASubExprs.InsertObject(0, ARegExpr, TObject((Len ShL 16) or 1));
- finally FreeMem (Stack);
+ finally
+ FreeMem(Stack);
end;
- end; { of function RegExprSubExpressions
---------------------------------------------------------------}
-
-
+end; { of function RegExprSubExpressions
+ -------------------------------------------------------------- }
const
- MAGIC = TREOp (216);// programm signature
-
-// name opcode opnd? meaning
- EEND = TREOp (0); // - End of program
- BOL = TREOp (1); // - Match "" at beginning of line
- EOL = TREOp (2); // - Match "" at end of line
- ANY = TREOp (3); // - Match any one character
- ANYOF = TREOp (4); // Str Match any character in string Str
- ANYBUT = TREOp (5); // Str Match any char. not in string Str
- BRANCH = TREOp (6); // Node Match this alternative, or the next
- BACK = TREOp (7); // - Jump backward (Next < 0)
- EXACTLY = TREOp (8); // Str Match string Str
- NOTHING = TREOp (9); // - Match empty string
- STAR = TREOp (10); // Node Match this (simple) thing 0 or more times
- PLUS = TREOp (11); // Node Match this (simple) thing 1 or more times
- ANYDIGIT = TREOp (12); // - Match any digit (equiv [0-9])
- NOTDIGIT = TREOp (13); // - Match not digit (equiv [0-9])
- ANYLETTER = TREOp (14); // - Match any letter from property WordChars
- NOTLETTER = TREOp (15); // - Match not letter from property WordChars
- ANYSPACE = TREOp (16); // - Match any space char (see property SpaceChars)
- NOTSPACE = TREOp (17); // - Match not space char (see property SpaceChars)
- BRACES = TREOp (18); // Node,Min,Max Match this (simple) thing from Min to Max times.
- // Min and Max are TREBracesArg
- COMMENT = TREOp (19); // - Comment ;)
- EXACTLYCI = TREOp (20); // Str Match string Str case insensitive
- ANYOFCI = TREOp (21); // Str Match any character in string Str, case insensitive
- ANYBUTCI = TREOp (22); // Str Match any char. not in string Str, case insensitive
- LOOPENTRY = TREOp (23); // Node Start of loop (Node - LOOP for this loop)
- LOOP = TREOp (24); // Node,Min,Max,LoopEntryJmp - back jump for LOOPENTRY.
- // Min and Max are TREBracesArg
- // Node - next node in sequence,
- // LoopEntryJmp - associated LOOPENTRY node addr
- ANYOFTINYSET= TREOp (25); // Chrs Match any one char from Chrs (exactly TinySetLen chars)
- ANYBUTTINYSET=TREOp (26); // Chrs Match any one char not in Chrs (exactly TinySetLen chars)
- ANYOFFULLSET= TREOp (27); // Set Match any one char from set of char
- // - very fast (one CPU instruction !) but takes 32 bytes of p-code
- BSUBEXP = TREOp (28); // Idx Match previously matched subexpression #Idx (stored as REChar) //###0.936
- BSUBEXPCI = TREOp (29); // Idx -"- in case-insensitive mode
-
- // Non-Greedy Style Ops //###0.940
- STARNG = TREOp (30); // Same as START but in non-greedy mode
- PLUSNG = TREOp (31); // Same as PLUS but in non-greedy mode
- BRACESNG = TREOp (32); // Same as BRACES but in non-greedy mode
- LOOPNG = TREOp (33); // Same as LOOP but in non-greedy mode
-
- // Multiline mode \m
- BOLML = TREOp (34); // - Match "" at beginning of line
- EOLML = TREOp (35); // - Match "" at end of line
- ANYML = TREOp (36); // - Match any one character
-
- // Word boundary
- BOUND = TREOp (37); // Match "" between words //###0.943
- NOTBOUND = TREOp (38); // Match "" not between words //###0.943
-
- // !!! Change OPEN value if you add new opcodes !!!
-
- OPEN = TREOp (39); // - Mark this point in input as start of \n
- // OPEN + 1 is \1, etc.
- CLOSE = TREOp (ord (OPEN) + NSUBEXP);
- // - Analogous to OPEN.
-
- // !!! Don't add new OpCodes after CLOSE !!!
-
-// We work with p-code through pointers, compatible with PRegExprChar.
-// Note: all code components (TRENextOff, TREOp, TREBracesArg, etc)
-// must have lengths that can be divided by SizeOf (REChar) !
-// A node is TREOp of opcode followed Next "pointer" of TRENextOff type.
-// The Next is a offset from the opcode of the node containing it.
-// An operand, if any, simply follows the node. (Note that much of
-// the code generation knows about this implicit relationship!)
-// Using TRENextOff=PtrInt speed up p-code processing.
-
-// Opcodes description:
-//
-// BRANCH The set of branches constituting a single choice are hooked
-// together with their "next" pointers, since precedence prevents
-// anything being concatenated to any individual branch. The
-// "next" pointer of the last BRANCH in a choice points to the
-// thing following the whole choice. This is also where the
-// final "next" pointer of each individual branch points; each
-// branch starts with the operand node of a BRANCH node.
-// BACK Normal "next" pointers all implicitly point forward; BACK
-// exists to make loop structures possible.
-// STAR,PLUS,BRACES '?', and complex '*' and '+', are implemented as
-// circular BRANCH structures using BACK. Complex '{min,max}'
-// - as pair LOOPENTRY-LOOP (see below). Simple cases (one
-// character per match) are implemented with STAR, PLUS and
-// BRACES for speed and to minimize recursive plunges.
-// LOOPENTRY,LOOP {min,max} are implemented as special pair
-// LOOPENTRY-LOOP. Each LOOPENTRY initialize loopstack for
-// current level.
-// OPEN,CLOSE are numbered at compile time.
-
-
-{=============================================================}
-{================== Error handling section ===================}
-{=============================================================}
+ OP_MAGIC = TREOp(216); // programm signature
+
+ // name opcode opnd? meaning
+ OP_EEND = TREOp(0); // - End of program
+ OP_BOL = TREOp(1); // - Match "" at beginning of line
+ OP_EOL = TREOp(2); // - Match "" at end of line
+ OP_ANY = TREOp(3); // - Match any one character
+ OP_ANYOF = TREOp(4); // Str Match any character in string Str
+ OP_ANYBUT = TREOp(5); // Str Match any char. not in string Str
+ OP_BRANCH = TREOp(6); // Node Match this alternative, or the next
+ OP_BACK = TREOp(7); // - Jump backward (Next < 0)
+ OP_EXACTLY = TREOp(8); // Str Match string Str
+ OP_NOTHING = TREOp(9); // - Match empty string
+ OP_STAR = TREOp(10); // Node Match this (simple) thing 0 or more times
+ OP_PLUS = TREOp(11); // Node Match this (simple) thing 1 or more times
+ OP_ANYDIGIT = TREOp(12); // - Match any digit (equiv [0-9])
+ OP_NOTDIGIT = TREOp(13); // - Match not digit (equiv [0-9])
+ OP_ANYLETTER = TREOp(14); // - Match any letter from property WordChars
+ OP_NOTLETTER = TREOp(15); // - Match not letter from property WordChars
+ OP_ANYSPACE = TREOp(16); // - Match any space char (see property SpaceChars)
+ OP_NOTSPACE = TREOp(17); // - Match not space char (see property SpaceChars)
+ OP_BRACES = TREOp(18);
+ // Node,Min,Max Match this (simple) thing from Min to Max times.
+ // Min and Max are TREBracesArg
+ OP_COMMENT = TREOp(19); // - Comment ;)
+ OP_EXACTLYCI = TREOp(20); // Str Match string Str case insensitive
+ OP_ANYOFCI = TREOp(21);
+ // Str Match any character in string Str, case insensitive
+ OP_ANYBUTCI = TREOp(22);
+ // Str Match any char. not in string Str, case insensitive
+ OP_LOOPENTRY = TREOp(23); // Node Start of loop (Node - LOOP for this loop)
+ OP_LOOP = TREOp(24); // Node,Min,Max,LoopEntryJmp - back jump for LOOPENTRY.
+ // Min and Max are TREBracesArg
+ // Node - next node in sequence,
+ // LoopEntryJmp - associated LOOPENTRY node addr
+ OP_BSUBEXP = TREOp(28);
+ // Idx Match previously matched subexpression #Idx (stored as REChar) //###0.936
+ OP_BSUBEXPCI = TREOp(29); // Idx -"- in case-insensitive mode
+
+ // Non-Greedy Style Ops //###0.940
+ OP_STARNG = TREOp(30); // Same as OP_START but in non-greedy mode
+ OP_PLUSNG = TREOp(31); // Same as OP_PLUS but in non-greedy mode
+ OP_BRACESNG = TREOp(32); // Same as OP_BRACES but in non-greedy mode
+ OP_LOOPNG = TREOp(33); // Same as OP_LOOP but in non-greedy mode
+
+ // Multiline mode \m
+ OP_BOLML = TREOp(34); // - Match "" at beginning of line
+ OP_EOLML = TREOp(35); // - Match "" at end of line
+ OP_ANYML = TREOp(36); // - Match any one character
+
+ // Word boundary
+ OP_BOUND = TREOp(37); // Match "" between words //###0.943
+ OP_NOTBOUND = TREOp(38); // Match "" not between words //###0.943
+
+ OP_ANYHORZSEP = TREOp(39); // Any horizontal whitespace \h
+ OP_NOTHORZSEP = TREOp(40); // Not horizontal whitespace \H
+ OP_ANYVERTSEP = TREOp(41); // Any vertical whitespace \v
+ OP_NOTVERTSEP = TREOp(42); // Not vertical whitespace \V
+
+ // !!! Change OP_OPEN value if you add new opcodes !!!
+
+ OP_OPEN = TREOp(43); // - Mark this point in input as start of \n
+ // OP_OPEN + 1 is \1, etc.
+ OP_CLOSE = TREOp(Ord(OP_OPEN) + NSUBEXP);
+ // - Analogous to OP_OPEN.
+
+ // !!! Don't add new OpCodes after CLOSE !!!
+
+ // We work with p-code through pointers, compatible with PRegExprChar.
+ // Note: all code components (TRENextOff, TREOp, TREBracesArg, etc)
+ // must have lengths that can be divided by SizeOf (REChar) !
+ // A node is TREOp of opcode followed Next "pointer" of TRENextOff type.
+ // The Next is a offset from the opcode of the node containing it.
+ // An operand, if any, simply follows the node. (Note that much of
+ // the code generation knows about this implicit relationship!)
+ // Using TRENextOff=PtrInt speed up p-code processing.
+
+ // Opcodes description:
+ //
+ // BRANCH The set of branches constituting a single choice are hooked
+ // together with their "next" pointers, since precedence prevents
+ // anything being concatenated to any individual branch. The
+ // "next" pointer of the last BRANCH in a choice points to the
+ // thing following the whole choice. This is also where the
+ // final "next" pointer of each individual branch points; each
+ // branch starts with the operand node of a BRANCH node.
+ // BACK Normal "next" pointers all implicitly point forward; BACK
+ // exists to make loop structures possible.
+ // STAR,PLUS,BRACES '?', and complex '*' and '+', are implemented as
+ // circular BRANCH structures using BACK. Complex '{min,max}'
+ // - as pair LOOPENTRY-LOOP (see below). Simple cases (one
+ // character per match) are implemented with STAR, PLUS and
+ // BRACES for speed and to minimize recursive plunges.
+ // LOOPENTRY,LOOP {min,max} are implemented as special pair
+ // LOOPENTRY-LOOP. Each LOOPENTRY initialize loopstack for
+ // current level.
+ // OPEN,CLOSE are numbered at compile time.
+
+ { ============================================================= }
+ { ================== Error handling section =================== }
+ { ============================================================= }
const
- reeOk = 0;
- reeCompNullArgument = 100;
- reeCompRegexpTooBig = 101;
- reeCompParseRegTooManyBrackets = 102;
- reeCompParseRegUnmatchedBrackets = 103;
- reeCompParseRegUnmatchedBrackets2 = 104;
- reeCompParseRegJunkOnEnd = 105;
- reePlusStarOperandCouldBeEmpty = 106;
- reeNestedSQP = 107;
- reeBadHexDigit = 108;
- reeInvalidRange = 109;
- reeParseAtomTrailingBackSlash = 110;
- reeNoHexCodeAfterBSlashX = 111;
- reeHexCodeAfterBSlashXTooBig = 112;
- reeUnmatchedSqBrackets = 113;
- reeInternalUrp = 114;
- reeQPSBFollowsNothing = 115;
- reeTrailingBackSlash = 116;
- reeRarseAtomInternalDisaster = 119;
- reeBRACESArgTooBig = 122;
- reeBracesMinParamGreaterMax = 124;
- reeUnclosedComment = 125;
- reeComplexBracesNotImplemented = 126;
- reeUrecognizedModifier = 127;
- reeBadLinePairedSeparator = 128;
- reeRegRepeatCalledInappropriately = 1000;
- reeMatchPrimMemoryCorruption = 1001;
- reeMatchPrimCorruptedPointers = 1002;
- reeNoExpression = 1003;
- reeCorruptedProgram = 1004;
- reeNoInputStringSpecified = 1005;
- reeOffsetMustBeGreaterThen0 = 1006;
- reeExecNextWithoutExec = 1007;
- reeGetInputStringWithoutInputString = 1008;
- reeDumpCorruptedOpcode = 1011;
- reeModifierUnsupported = 1013;
- reeLoopStackExceeded = 1014;
- reeLoopWithoutEntry = 1015;
- reeBadPCodeImported = 2000;
-
-function TRegExpr.ErrorMsg (AErrorID : integer) : RegExprString;
- begin
+ reeOk = 0;
+ reeCompNullArgument = 100;
+ reeCompParseRegTooManyBrackets = 102;
+ reeCompParseRegUnmatchedBrackets = 103;
+ reeCompParseRegUnmatchedBrackets2 = 104;
+ reeCompParseRegJunkOnEnd = 105;
+ reePlusStarOperandCouldBeEmpty = 106;
+ reeNestedSQP = 107;
+ reeBadHexDigit = 108;
+ reeInvalidRange = 109;
+ reeParseAtomTrailingBackSlash = 110;
+ reeNoHexCodeAfterBSlashX = 111;
+ reeHexCodeAfterBSlashXTooBig = 112;
+ reeUnmatchedSqBrackets = 113;
+ reeInternalUrp = 114;
+ reeQPSBFollowsNothing = 115;
+ reeTrailingBackSlash = 116;
+ reeNoLetterAfterBSlashC = 117;
+ reeMetaCharAfterMinusInRange = 118;
+ reeRarseAtomInternalDisaster = 119;
+ reeIncorrectBraces = 121;
+ reeBRACESArgTooBig = 122;
+ reeUnknownOpcodeInFillFirst = 123;
+ reeBracesMinParamGreaterMax = 124;
+ reeUnclosedComment = 125;
+ reeComplexBracesNotImplemented = 126;
+ reeUnrecognizedModifier = 127;
+ reeBadLinePairedSeparator = 128;
+ // Runtime errors must be >= 1000
+ reeRegRepeatCalledInappropriately = 1000;
+ reeMatchPrimMemoryCorruption = 1001;
+ reeMatchPrimCorruptedPointers = 1002;
+ reeNoExpression = 1003;
+ reeCorruptedProgram = 1004;
+ reeNoInputStringSpecified = 1005;
+ reeOffsetMustBePositive = 1006;
+ reeExecNextWithoutExec = 1007;
+ reeBadOpcodeInCharClass = 1008;
+ reeDumpCorruptedOpcode = 1011;
+ reeModifierUnsupported = 1013;
+ reeLoopStackExceeded = 1014;
+ reeLoopWithoutEntry = 1015;
+
+function TRegExpr.ErrorMsg(AErrorID: integer): RegExprString;
+begin
case AErrorID of
- reeOk: Result := 'No errors';
- reeCompNullArgument: Result := 'TRegExpr(comp): Null Argument';
- reeCompRegexpTooBig: Result := 'TRegExpr(comp): Regexp Too Big';
- reeCompParseRegTooManyBrackets: Result := 'TRegExpr(comp): ParseReg Too Many ()';
- reeCompParseRegUnmatchedBrackets: Result := 'TRegExpr(comp): ParseReg Unmatched ()';
- reeCompParseRegUnmatchedBrackets2: Result := 'TRegExpr(comp): ParseReg Unmatched ()';
- reeCompParseRegJunkOnEnd: Result := 'TRegExpr(comp): ParseReg Junk On End';
- reePlusStarOperandCouldBeEmpty: Result := 'TRegExpr(comp): *+ Operand Could Be Empty';
- reeNestedSQP: Result := 'TRegExpr(comp): Nested *?+';
- reeBadHexDigit: Result := 'TRegExpr(comp): Bad Hex Digit';
- reeInvalidRange: Result := 'TRegExpr(comp): Invalid [] Range';
- reeParseAtomTrailingBackSlash: Result := 'TRegExpr(comp): Parse Atom Trailing \';
- reeNoHexCodeAfterBSlashX: Result := 'TRegExpr(comp): No Hex Code After \x';
- reeHexCodeAfterBSlashXTooBig: Result := 'TRegExpr(comp): Hex Code After \x Is Too Big';
- reeUnmatchedSqBrackets: Result := 'TRegExpr(comp): Unmatched []';
- reeInternalUrp: Result := 'TRegExpr(comp): Internal Urp';
- reeQPSBFollowsNothing: Result := 'TRegExpr(comp): ?+*{ Follows Nothing';
- reeTrailingBackSlash: Result := 'TRegExpr(comp): Trailing \';
- reeRarseAtomInternalDisaster: Result := 'TRegExpr(comp): RarseAtom Internal Disaster';
- reeBRACESArgTooBig: Result := 'TRegExpr(comp): BRACES Argument Too Big';
- reeBracesMinParamGreaterMax: Result := 'TRegExpr(comp): BRACE Min Param Greater then Max';
- reeUnclosedComment: Result := 'TRegExpr(comp): Unclosed (?#Comment)';
- reeComplexBracesNotImplemented: Result := 'TRegExpr(comp): If you want take part in beta-testing BRACES ''{min,max}'' and non-greedy ops ''*?'', ''+?'', ''??'' for complex cases - remove ''.'' from {.$DEFINE ComplexBraces}';
- reeUrecognizedModifier: Result := 'TRegExpr(comp): Urecognized Modifier';
- reeBadLinePairedSeparator: Result := 'TRegExpr(comp): LinePairedSeparator must countain two different chars or no chars at all';
-
- reeRegRepeatCalledInappropriately: Result := 'TRegExpr(exec): RegRepeat Called Inappropriately';
- reeMatchPrimMemoryCorruption: Result := 'TRegExpr(exec): MatchPrim Memory Corruption';
- reeMatchPrimCorruptedPointers: Result := 'TRegExpr(exec): MatchPrim Corrupted Pointers';
- reeNoExpression: Result := 'TRegExpr(exec): Not Assigned Expression Property';
- reeCorruptedProgram: Result := 'TRegExpr(exec): Corrupted Program';
- reeNoInputStringSpecified: Result := 'TRegExpr(exec): No Input String Specified';
- reeOffsetMustBeGreaterThen0: Result := 'TRegExpr(exec): Offset Must Be Greater Then 0';
- reeExecNextWithoutExec: Result := 'TRegExpr(exec): ExecNext Without Exec[Pos]';
- reeGetInputStringWithoutInputString: Result := 'TRegExpr(exec): GetInputString Without InputString';
- reeDumpCorruptedOpcode: Result := 'TRegExpr(dump): Corrupted Opcode';
- reeLoopStackExceeded: Result := 'TRegExpr(exec): Loop Stack Exceeded';
- reeLoopWithoutEntry: Result := 'TRegExpr(exec): Loop Without LoopEntry !';
-
- reeBadPCodeImported: Result := 'TRegExpr(misc): Bad p-code imported';
- else Result := 'Unknown error';
- end;
- end; { of procedure TRegExpr.Error
---------------------------------------------------------------}
+ reeOk:
+ Result := 'No errors';
+ reeCompNullArgument:
+ Result := 'TRegExpr compile: null argument';
+ reeCompParseRegTooManyBrackets:
+ Result := 'TRegExpr compile: ParseReg: too many ()';
+ reeCompParseRegUnmatchedBrackets:
+ Result := 'TRegExpr compile: ParseReg: unmatched ()';
+ reeCompParseRegUnmatchedBrackets2:
+ Result := 'TRegExpr compile: ParseReg: unmatched ()';
+ reeCompParseRegJunkOnEnd:
+ Result := 'TRegExpr compile: ParseReg: junk at end';
+ reePlusStarOperandCouldBeEmpty:
+ Result := 'TRegExpr compile: *+ operand could be empty';
+ reeNestedSQP:
+ Result := 'TRegExpr compile: nested *?+';
+ reeBadHexDigit:
+ Result := 'TRegExpr compile: bad hex digit';
+ reeInvalidRange:
+ Result := 'TRegExpr compile: invalid [] range';
+ reeParseAtomTrailingBackSlash:
+ Result := 'TRegExpr compile: parse atom trailing \';
+ reeNoHexCodeAfterBSlashX:
+ Result := 'TRegExpr compile: no hex code after \x';
+ reeNoLetterAfterBSlashC:
+ Result := 'TRegExpr compile: no letter "A".."Z" after \c';
+ reeMetaCharAfterMinusInRange:
+ Result := 'TRegExpr compile: metachar after "-" in [] range';
+ reeHexCodeAfterBSlashXTooBig:
+ Result := 'TRegExpr compile: hex code after \x is too big';
+ reeUnmatchedSqBrackets:
+ Result := 'TRegExpr compile: unmatched []';
+ reeInternalUrp:
+ Result := 'TRegExpr compile: internal fail on char "|", ")"';
+ reeQPSBFollowsNothing:
+ Result := 'TRegExpr compile: ?+*{ follows nothing';
+ reeTrailingBackSlash:
+ Result := 'TRegExpr compile: trailing \';
+ reeRarseAtomInternalDisaster:
+ Result := 'TRegExpr compile: RarseAtom internal disaster';
+ reeIncorrectBraces:
+ Result := 'TRegExpr compile: incorrect {} braces';
+ reeBRACESArgTooBig:
+ Result := 'TRegExpr compile: braces {} argument too big';
+ reeUnknownOpcodeInFillFirst:
+ Result := 'TRegExpr compile: unknown opcode in FillFirstCharSet ('+DumpOp(fLastErrorOpcode)+')';
+ reeBracesMinParamGreaterMax:
+ Result := 'TRegExpr compile: braces {} min param greater then max';
+ reeUnclosedComment:
+ Result := 'TRegExpr compile: unclosed (?#comment)';
+ reeComplexBracesNotImplemented:
+ Result := 'TRegExpr compile: if you use braces {} and non-greedy ops *?, +?, ?? for complex cases, enable {$DEFINE ComplexBraces}';
+ reeUnrecognizedModifier:
+ Result := 'TRegExpr compile: unrecognized modifier';
+ reeBadLinePairedSeparator:
+ Result := 'TRegExpr compile: LinePairedSeparator must countain two different chars or be empty';
+
+ reeRegRepeatCalledInappropriately:
+ Result := 'TRegExpr exec: RegRepeat called inappropriately';
+ reeMatchPrimMemoryCorruption:
+ Result := 'TRegExpr exec: MatchPrim memory corruption';
+ reeMatchPrimCorruptedPointers:
+ Result := 'TRegExpr exec: MatchPrim corrupted pointers';
+ reeNoExpression:
+ Result := 'TRegExpr exec: empty expression';
+ reeCorruptedProgram:
+ Result := 'TRegExpr exec: corrupted opcode (no magic byte)';
+ reeNoInputStringSpecified:
+ Result := 'TRegExpr exec: empty input string';
+ reeOffsetMustBePositive:
+ Result := 'TRegExpr exec: offset must be >0';
+ reeExecNextWithoutExec:
+ Result := 'TRegExpr exec: ExecNext without Exec(Pos)';
+ reeBadOpcodeInCharClass:
+ Result := 'TRegExpr exec: invalid opcode in char class';
+ reeDumpCorruptedOpcode:
+ Result := 'TRegExpr dump: corrupted opcode';
+ reeLoopStackExceeded:
+ Result := 'TRegExpr exec: loop stack exceeded';
+ reeLoopWithoutEntry:
+ Result := 'TRegExpr exec: loop without loop entry';
+ else
+ Result := 'Unknown error';
+ end;
+end; { of procedure TRegExpr.Error
+ -------------------------------------------------------------- }
-function TRegExpr.LastError : integer;
- begin
+function TRegExpr.LastError: integer;
+begin
Result := fLastError;
fLastError := reeOk;
- end; { of function TRegExpr.LastError
---------------------------------------------------------------}
+end; { of function TRegExpr.LastError
+ -------------------------------------------------------------- }
+{ ============================================================= }
+{ ===================== Common section ======================== }
+{ ============================================================= }
-{=============================================================}
-{===================== Common section ========================}
-{=============================================================}
-
-class function TRegExpr.VersionMajor : integer; //###0.944
- begin
- Result := TRegExprVersionMajor;
- end; { of class function TRegExpr.VersionMajor
---------------------------------------------------------------}
+class function TRegExpr.VersionMajor: integer;
+begin
+ Result := REVersionMajor;
+end;
-class function TRegExpr.VersionMinor : integer; //###0.944
- begin
- Result := TRegExprVersionMinor;
- end; { of class function TRegExpr.VersionMinor
---------------------------------------------------------------}
+class function TRegExpr.VersionMinor: integer;
+begin
+ Result := REVersionMinor;
+end;
constructor TRegExpr.Create;
- begin
+begin
inherited;
programm := nil;
- fExpression := nil;
- fInputString := nil;
+ fExpression := '';
+ fInputString := '';
- regexpbeg := nil;
- fExprIsCompiled := false;
- {$IFDEF UnicodeWordDetection}
- FUseUnicodeWordDetection:=False;
- {$ENDIF}
+ regexpBegin := nil;
+ regexpIsCompiled := False;
+ FillChar(fModifiers, SIzeOf(fModifiers), 0);
ModifierI := RegExprModifierI;
ModifierR := RegExprModifierR;
ModifierS := RegExprModifierS;
ModifierG := RegExprModifierG;
- ModifierM := RegExprModifierM; //###0.940
+ ModifierM := RegExprModifierM;
+ ModifierX := RegExprModifierX;
- SpaceChars := RegExprSpaceChars; //###0.927
- WordChars := RegExprWordChars; //###0.929
- fInvertCase := RegExprInvertCaseFunction; //###0.927
+ SpaceChars := RegExprSpaceChars; // ###0.927
+ WordChars := RegExprWordChars; // ###0.929
+ fInvertCase := RegExprInvertCaseFunction; // ###0.927
- fLineSeparators := RegExprLineSeparators; //###0.941
- LinePairedSeparator := RegExprLinePairedSeparator; //###0.941
+ fLineSeparators := RegExprLineSeparators; // ###0.941
+ LinePairedSeparator := RegExprLinePairedSeparator; // ###0.941
- FUseOsLineEndOnReplace:=True;
- FReplaceLineEnd:=sLineBreak;
- end; { of constructor TRegExpr.Create
---------------------------------------------------------------}
+ FUseOsLineEndOnReplace := True;
+ FReplaceLineEnd := sLineBreak;
+
+ {$IFDEF UnicodeWordDetection}
+ FUseUnicodeWordDetection := True;
+ {$ENDIF}
+
+ fSlowChecksSizeMax := 2000;
+
+ InitLineSepArray;
+ InitCharCheckers;
+end; { of constructor TRegExpr.Create
+ -------------------------------------------------------------- }
-constructor TRegExpr.Create(AExpression:string);
+
+constructor TRegExpr.Create(const AExpression: RegExprString);
begin
- create;
- Expression:=AExpression;
+ Create;
+ Expression := AExpression;
end;
+
destructor TRegExpr.Destroy;
- begin
+begin
if programm <> nil then
begin
- FreeMem (programm);
- programm:=nil;
+ FreeMem(programm);
+ programm := nil;
end;
- if fExpression <> nil then
+end; { of destructor TRegExpr.Destroy
+ -------------------------------------------------------------- }
+
+class function TRegExpr.InvertCaseFunction(const Ch: REChar): REChar;
+begin
+ Result := Ch;
+ if (Ch >= 'a') and (Ch <= 'z') then
begin
- FreeMem (fExpression);
- fExpression:=nil;
+ Dec(Result, 32);
+ Exit;
end;
- if fInputString <> nil then
+ if (Ch >= 'A') and (Ch <= 'Z') then
begin
- FreeMem (fInputString);
- fInputString:=nil;
+ Inc(Result, 32);
+ Exit;
end;
- end; { of destructor TRegExpr.Destroy
---------------------------------------------------------------}
+ if Ord(Ch) < 128 then
+ Exit;
-{$IFDEF UNICODE}
-function AnsiUpperCase(const s: RegExprString): RegExprString;inline;
+ Result := _UpperCase(Ch);
+ if Result = Ch then
+ Result := _LowerCase(Ch);
+ Result := _UpperCase(Ch);
+ if Result = Ch then
+ Result := _LowerCase(Ch);
+end; { of function TRegExpr.InvertCaseFunction
+ -------------------------------------------------------------- }
+procedure TRegExpr.SetExpression(const AStr: RegExprString);
begin
- Result:=WideUpperCase(S);
-end;
-
-function AnsiLowerCase(const s: RegExprString): RegExprString;inline;
+ if (AStr <> fExpression) or not regexpIsCompiled then
+ begin
+ regexpIsCompiled := False;
+ fExpression := AStr;
+ UniqueString(fExpression);
+ fRegexStart := PRegExprChar(fExpression);
+ fRegexEnd := fRegexStart + Length(fExpression);
+ InvalidateProgramm; // ###0.941
+ end;
+end; { of procedure TRegExpr.SetExpression
+ -------------------------------------------------------------- }
+function TRegExpr.GetSubExprCount: integer;
begin
- Result:=WideLowerCase(S);
+ // if nothing found, we must return -1 per TRegExpr docs
+ if startp[0] = nil then
+ Result := -1
+ else
+ Result := GrpCount;
end;
-{$ENDIF}
-class function TRegExpr.InvertCaseFunction (const Ch : REChar) : REChar;
+function TRegExpr.GetMatchPos(Idx: integer): PtrInt;
begin
- Result := AnsiUpperCase(Ch)[1];
- if Result = Ch then
- Result := AnsiLowerCase(Ch)[1];
-end; { of function TRegExpr.InvertCaseFunction
---------------------------------------------------------------}
-
-function TRegExpr.GetExpression : RegExprString;
- begin
- if fExpression <> nil
- then Result := fExpression
- else Result := '';
- end; { of function TRegExpr.GetExpression
---------------------------------------------------------------}
+ Idx := GrpIndexes[Idx];
+ if (Idx >= 0) and (startp[Idx] <> nil) then
+ Result := startp[Idx] - fInputStart + 1
+ else
+ Result := -1;
+end; { of function TRegExpr.GetMatchPos
+ -------------------------------------------------------------- }
-procedure TRegExpr.SetExpression (const s : RegExprString);
- var
- Len : PtrInt; //###0.950
- begin
- if (s <> fExpression) or not fExprIsCompiled then begin
- fExprIsCompiled := false;
- if fExpression <> nil then begin
- FreeMem (fExpression);
- fExpression := nil;
- end;
- if s <> '' then begin
- Len := length (s); //###0.950
- GetMem (fExpression, (Len + 1) * SizeOf (REChar));
- System.Move(s[1],fExpression^,(Len + 1) * SizeOf (REChar));
-
- InvalidateProgramm; //###0.941
- end;
- end;
- end; { of procedure TRegExpr.SetExpression
---------------------------------------------------------------}
+function TRegExpr.GetMatchLen(Idx: integer): PtrInt;
+begin
+ Idx := GrpIndexes[Idx];
+ if (Idx >= 0) and (startp[Idx] <> nil) then
+ Result := endp[Idx] - startp[Idx]
+ else
+ Result := -1;
+end; { of function TRegExpr.GetMatchLen
+ -------------------------------------------------------------- }
-function TRegExpr.GetSubExprMatchCount : integer;
- begin
- if Assigned (fInputString) then begin
- Result := NSUBEXP - 1;
- while (Result > 0) and ((startp [Result] = nil)
- or (endp [Result] = nil))
- do dec (Result);
- end
- else Result := -1;
- end; { of function TRegExpr.GetSubExprMatchCount
---------------------------------------------------------------}
+function TRegExpr.GetMatch(Idx: integer): RegExprString;
+begin
+ Result := '';
+ Idx := GrpIndexes[Idx];
+ if (Idx >= 0) and (endp[Idx] > startp[Idx]) then
+ SetString(Result, startp[Idx], endp[Idx] - startp[Idx]);
+ {
+ // then Result := copy (fInputString, MatchPos [Idx], MatchLen [Idx]) //###0.929
+ then
+ begin
+ SetLength(Result, endp[Idx] - startp[Idx]);
+ System.Move(startp[Idx]^, Result[1], Length(Result) * SizeOf(REChar));
+ end;
+ }
+end; { of function TRegExpr.GetMatch
+ -------------------------------------------------------------- }
-function TRegExpr.GetMatchPos (Idx : integer) : PtrInt;
- begin
- if (Idx >= 0) and (Idx < NSUBEXP) and Assigned (fInputString)
- and Assigned (startp [Idx]) and Assigned (endp [Idx]) then begin
- Result := (startp [Idx] - fInputString) + 1;
- end
- else Result := -1;
- end; { of function TRegExpr.GetMatchPos
---------------------------------------------------------------}
+function TRegExpr.GetModifierStr: RegExprString;
+begin
+ Result := '-';
-function TRegExpr.GetMatchLen (Idx : integer) : PtrInt;
- begin
- if (Idx >= 0) and (Idx < NSUBEXP) and Assigned (fInputString)
- and Assigned (startp [Idx]) and Assigned (endp [Idx]) then begin
- Result := endp [Idx] - startp [Idx];
- end
- else Result := -1;
- end; { of function TRegExpr.GetMatchLen
---------------------------------------------------------------}
+ if ModifierI then
+ Result := 'i' + Result
+ else
+ Result := Result + 'i';
+ if ModifierR then
+ Result := 'r' + Result
+ else
+ Result := Result + 'r';
+ if ModifierS then
+ Result := 's' + Result
+ else
+ Result := Result + 's';
+ if ModifierG then
+ Result := 'g' + Result
+ else
+ Result := Result + 'g';
+ if ModifierM then
+ Result := 'm' + Result
+ else
+ Result := Result + 'm';
+ if ModifierX then
+ Result := 'x' + Result
+ else
+ Result := Result + 'x';
-function TRegExpr.GetMatch (Idx : integer) : RegExprString;
- begin
- Result:='';
- if (Idx >= 0) and (Idx < NSUBEXP) and Assigned (fInputString)
- and Assigned (startp [Idx]) and Assigned (endp [Idx])
- and (endp [Idx] > startp[Idx])
- //then Result := copy (fInputString, MatchPos [Idx], MatchLen [Idx]) //###0.929
- then begin
- //SetString (Result, startp [idx], endp [idx] - startp [idx])
- SetLength(Result,endp [idx] - startp [idx]);
- System.Move(startp [idx]^,Result[1],length(Result)*sizeof(REChar));
- end
- else Result := '';
- end; { of function TRegExpr.GetMatch
---------------------------------------------------------------}
+ if Result[Length(Result)] = '-' // remove '-' if all modifiers are 'On'
+ then
+ System.Delete(Result, Length(Result), 1);
+end; { of function TRegExpr.GetModifierStr
+ -------------------------------------------------------------- }
-function TRegExpr.GetModifierStr : RegExprString;
- begin
- Result := '-';
+procedure TRegExpr.SetModifierG(AValue: boolean);
+begin
+ fModifiers.G := AValue;
+end;
- if ModifierI
- then Result := 'i' + Result
- else Result := Result + 'i';
- if ModifierR
- then Result := 'r' + Result
- else Result := Result + 'r';
- if ModifierS
- then Result := 's' + Result
- else Result := Result + 's';
- if ModifierG
- then Result := 'g' + Result
- else Result := Result + 'g';
- if ModifierM
- then Result := 'm' + Result
- else Result := Result + 'm';
- if ModifierX
- then Result := 'x' + Result
- else Result := Result + 'x';
-
- if Result [length (Result)] = '-' // remove '-' if all modifiers are 'On'
- then System.Delete (Result, length (Result), 1);
- end; { of function TRegExpr.GetModifierStr
---------------------------------------------------------------}
+procedure TRegExpr.SetModifierI(AValue: boolean);
+begin
+ fModifiers.I := AValue;
+end;
-class function TRegExpr.ParseModifiersStr (const AModifiers : RegExprString;
-var AModifiersInt : integer) : boolean;
-// !!! Be carefull - this is class function and must not use object instance fields
- var
- i : integer;
- IsOn : boolean;
- Mask : integer;
- begin
- Result := true;
- IsOn := true;
- Mask := 0; // prevent compiler warning
- for i := 1 to length (AModifiers) do
- if AModifiers [i] = '-'
- then IsOn := false
- else begin
- if Pos (AModifiers [i], 'iI') > 0
- then Mask := MaskModI
- else if Pos (AModifiers [i], 'rR') > 0
- then Mask := MaskModR
- else if Pos (AModifiers [i], 'sS') > 0
- then Mask := MaskModS
- else if Pos (AModifiers [i], 'gG') > 0
- then Mask := MaskModG
- else if Pos (AModifiers [i], 'mM') > 0
- then Mask := MaskModM
- else if Pos (AModifiers [i], 'xX') > 0
- then Mask := MaskModX
- else begin
- Result := false;
- EXIT;
- end;
- if IsOn
- then AModifiersInt := AModifiersInt or Mask
- else AModifiersInt := AModifiersInt and not Mask;
- end;
- end; { of function TRegExpr.ParseModifiersStr
---------------------------------------------------------------}
+procedure TRegExpr.SetModifierM(AValue: boolean);
+begin
+ fModifiers.M := AValue;
+end;
-procedure TRegExpr.SetModifierStr (const AModifiers : RegExprString);
- begin
- if not ParseModifiersStr (AModifiers, fModifiers)
- then Error (reeModifierUnsupported);
- end; { of procedure TRegExpr.SetModifierStr
---------------------------------------------------------------}
+procedure TRegExpr.SetModifierR(AValue: boolean);
+begin
+ fModifiers.R := AValue;
+end;
-function TRegExpr.GetModifier (AIndex : integer) : boolean;
- var
- Mask : integer;
- begin
- Result := false;
- case AIndex of
- 1: Mask := MaskModI;
- 2: Mask := MaskModR;
- 3: Mask := MaskModS;
- 4: Mask := MaskModG;
- 5: Mask := MaskModM;
- 6: Mask := MaskModX;
- else begin
- Error (reeModifierUnsupported);
- EXIT;
- end;
- end;
- Result := (fModifiers and Mask) <> 0;
- end; { of function TRegExpr.GetModifier
---------------------------------------------------------------}
+procedure TRegExpr.SetModifierS(AValue: boolean);
+begin
+ fModifiers.S := AValue;
+end;
-procedure TRegExpr.SetModifier (AIndex : integer; ASet : boolean);
- var
- Mask : integer;
- begin
- case AIndex of
- 1: Mask := MaskModI;
- 2: Mask := MaskModR;
- 3: Mask := MaskModS;
- 4: Mask := MaskModG;
- 5: Mask := MaskModM;
- 6: Mask := MaskModX;
- else begin
- Error (reeModifierUnsupported);
- EXIT;
- end;
- end;
- if ASet
- then fModifiers := fModifiers or Mask
- else fModifiers := fModifiers and not Mask;
- end; { of procedure TRegExpr.SetModifier
---------------------------------------------------------------}
+procedure TRegExpr.SetModifierX(AValue: boolean);
+begin
+ fModifiers.X := AValue;
+end;
+procedure TRegExpr.SetModifierStr(const AStr: RegExprString);
+begin
+ if not ParseModifiers(PRegExprChar(AStr), Length(AStr), fModifiers) then
+ Error(reeModifierUnsupported);
+end; { of procedure TRegExpr.SetModifierStr
+ -------------------------------------------------------------- }
-{=============================================================}
-{==================== Compiler section =======================}
-{=============================================================}
+{ ============================================================= }
+{ ==================== Compiler section ======================= }
+{ ============================================================= }
{$IFDEF UnicodeWordDetection}
-function TRegExpr.IsUnicodeWordChar(AChar: REChar): Boolean;
-var
- NType: byte;
-begin
- if Ord(AChar)<128 then
- exit(false)
- else
- if Ord(AChar)>=LOW_SURROGATE_BEGIN then
- exit(false)
- else
+ {$IFDEF FPC}
+ function IsUnicodeWordChar(AChar: WideChar): boolean; inline;
+ var
+ NType: byte;
begin
- NType:= GetProps(Ord(AChar))^.Category;
- Result:= (NType<=UGC_OtherNumber);
+ if Ord(AChar) >= LOW_SURROGATE_BEGIN then
+ Exit(False);
+ NType := GetProps(Ord(AChar))^.Category;
+ Result := (NType <= UGC_OtherNumber);
end;
-end;
+ {$ELSE}
+ function IsUnicodeWordChar(AChar: WideChar): boolean; inline;
+ begin
+ Result := System.Character.IsLetterOrDigit(AChar);
+ end;
+ {$ENDIF}
{$ENDIF}
-
-function TRegExpr.IsWordChar(AChar: REChar): Boolean; inline;
+function TRegExpr.IsWordChar(AChar: REChar): boolean;
begin
- Result := Pos(AChar, fWordChars)>0;
+ Result := Pos(AChar, fWordChars) > 0;
{$IFDEF UnicodeWordDetection}
- If Not Result and UseUnicodeWordDetection then
- Result:=IsUnicodeWordChar(aChar);
+ if not Result and (Ord(AChar) >= 128) and UseUnicodeWordDetection then
+ Result := IsUnicodeWordChar(AChar);
+ {$ENDIF}
+end;
+
+function TRegExpr.IsSpaceChar(AChar: REChar): boolean;
+begin
+ Result := Pos(AChar, fSpaceChars) > 0;
+end;
+
+function TRegExpr.IsCustomLineSeparator(AChar: REChar): boolean;
+begin
+ {$IFDEF UniCode}
+ Result := Pos(AChar, fLineSeparators) > 0;
+ {$ELSE}
+ Result := fLineSepArray[byte(AChar)];
{$ENDIF}
end;
+function IsDigitChar(AChar: REChar): boolean; inline;
+begin
+ case AChar of
+ '0' .. '9':
+ Result := True;
+ else
+ Result := False;
+ end;
+end;
-function TRegExpr.IsSpaceChar(AChar: PRegExprChar): Boolean;
+function IsHorzSeparator(AChar: REChar): boolean; inline;
begin
- Result:=Pos(AChar^,fSpaceChars)>0;
+ // Tab and Unicode categoty "Space Separator": https://www.compart.com/en/unicode/category/Zs
+ case AChar of
+ #9, #$20, #$A0:
+ Result := True;
+ {$IFDEF UniCode}
+ #$1680, #$2000 .. #$200A, #$202F, #$205F, #$3000:
+ Result := True;
+ {$ENDIF}
+ else
+ Result := False;
+ end;
end;
-function TRegExpr.IsDigit(AChar: PRegExprChar): Boolean;
+function IsLineSeparator(AChar: REChar): boolean; inline;
begin
- // Avoid Unicode char-> ansi char conversion in case of unicode regexp.
- Result:=Ord(AChar^) in [Ord('0')..Ord('9')]
+ case AChar of
+ #$d, #$a, #$b, #$c:
+ Result := True;
+ {$IFDEF UniCode}
+ #$2028, #$2029, #$85:
+ Result := True;
+ {$ENDIF}
+ else
+ Result := False;
+ end;
end;
procedure TRegExpr.InvalidateProgramm;
- begin
- if programm <> nil then begin
- FreeMem (programm);
+begin
+ if programm <> nil then
+ begin
+ FreeMem(programm);
programm := nil;
- end;
- end; { of procedure TRegExpr.InvalidateProgramm
---------------------------------------------------------------}
-
-procedure TRegExpr.Compile; //###0.941
- begin
- if fExpression = nil then begin // No Expression assigned
- Error (reeNoExpression);
- EXIT;
- end;
- CompileRegExpr (fExpression);
- end; { of procedure TRegExpr.Compile
---------------------------------------------------------------}
+ end;
+end; { of procedure TRegExpr.InvalidateProgramm
+ -------------------------------------------------------------- }
-function TRegExpr.IsProgrammOk : boolean;
- {$IFNDEF UniCode}
- var
- i : integer;
- {$ENDIF}
- begin
- Result := false;
+procedure TRegExpr.Compile;
+begin
+ if fExpression = '' then
+ begin
+ Error(reeNoExpression);
+ Exit;
+ end;
- // check modifiers
- if fModifiers <> fProgModifiers //###0.941
- then InvalidateProgramm;
+ CompileRegExpr(PRegExprChar(fExpression));
+end; { of procedure TRegExpr.Compile
+ -------------------------------------------------------------- }
- // can we optimize line separators by using sets?
+procedure TRegExpr.InitLineSepArray;
+{$IFNDEF UniCode}
+var
+ i: integer;
+{$ENDIF}
+begin
{$IFNDEF UniCode}
- fLineSeparatorsSet := [];
- for i := 1 to length (fLineSeparators)
- do System.Include (fLineSeparatorsSet, fLineSeparators [i]);
+ FillChar(fLineSepArray, SizeOf(fLineSepArray), 0);
+ for i := 1 to Length(fLineSeparators) do
+ fLineSepArray[byte(fLineSeparators[i])] := True;
{$ENDIF}
+end;
+
+function TRegExpr.IsProgrammOk: boolean;
+begin
+ Result := False;
+
+ // check modifiers
+ if not IsModifiersEqual(fModifiers, fProgModifiers) // ###0.941
+ then
+ InvalidateProgramm;
// [Re]compile if needed
- if programm = nil
- then Compile; //###0.941
-
- // check [re]compiled programm
- if programm = nil
- then EXIT // error was set/raised by Compile (was reeExecAfterCompErr)
- else if programm [0] <> MAGIC // Program corrupted.
- then Error (reeCorruptedProgram)
- else Result := true;
- end; { of function TRegExpr.IsProgrammOk
---------------------------------------------------------------}
+ if programm = nil then
+ begin
+ Compile; // ###0.941
+ // Check [re]compiled programm
+ if programm = nil then
+ Exit; // error was set/raised by Compile (was reeExecAfterCompErr)
+ end;
-procedure TRegExpr.Tail (p : PRegExprChar; val : PRegExprChar);
+ if programm[0] <> OP_MAGIC // Program corrupted.
+ then
+ Error(reeCorruptedProgram)
+ else
+ Result := True;
+end; { of function TRegExpr.IsProgrammOk
+ -------------------------------------------------------------- }
+
+procedure TRegExpr.Tail(p: PRegExprChar; val: PRegExprChar);
// set the next-pointer at the end of a node chain
- var
- scan : PRegExprChar;
- temp : PRegExprChar;
-// i : int64;
- begin
- if p = @regdummy
- then EXIT;
+var
+ scan: PRegExprChar;
+ temp: PRegExprChar;
+begin
+ if p = @regdummy then
+ Exit;
// Find last node.
scan := p;
- REPEAT
- temp := regnext (scan);
- if temp = nil
- then BREAK;
- scan := temp;
- UNTIL false;
+ repeat
+ temp := regnext(scan);
+ if temp = nil then
+ Break;
+ scan := temp;
+ until False;
// Set Next 'pointer'
- if val < scan
- then PRENextOff (AlignToPtr(scan + REOpSz))^ := - (scan - val) //###0.948
- // work around PWideChar subtraction bug (Delphi uses
- // shr after subtraction to calculate widechar distance %-( )
- // so, if difference is negative we have .. the "feature" :(
- // I could wrap it in $IFDEF UniCode, but I didn't because
- // "P – Q computes the difference between the address given
- // by P (the higher address) and the address given by Q (the
- // lower address)" - Delphi help quotation.
- else PRENextOff (AlignToPtr(scan + REOpSz))^ := val - scan; //###0.933
- end; { of procedure TRegExpr.Tail
---------------------------------------------------------------}
+ if val < scan then
+ PRENextOff(AlignToPtr(scan + REOpSz))^ := -(scan - val) // ###0.948
+ // work around PWideChar subtraction bug (Delphi uses
+ // shr after subtraction to calculate widechar distance %-( )
+ // so, if difference is negative we have .. the "feature" :(
+ // I could wrap it in $IFDEF UniCode, but I didn't because
+ // "P – Q computes the difference between the address given
+ // by P (the higher address) and the address given by Q (the
+ // lower address)" - Delphi help quotation.
+ else
+ PRENextOff(AlignToPtr(scan + REOpSz))^ := val - scan; // ###0.933
+end; { of procedure TRegExpr.Tail
+ -------------------------------------------------------------- }
-procedure TRegExpr.OpTail (p : PRegExprChar; val : PRegExprChar);
+procedure TRegExpr.OpTail(p: PRegExprChar; val: PRegExprChar);
// regtail on operand of first argument; nop if operandless
- begin
- // "Operandless" and "op != BRANCH" are synonymous in practice.
- if (p = nil) or (p = @regdummy) or (PREOp (p)^ <> BRANCH)
- then EXIT;
- Tail (p + REOpSz + RENextOffSz, val); //###0.933
- end; { of procedure TRegExpr.OpTail
---------------------------------------------------------------}
-
-function TRegExpr.EmitNode (op : TREOp) : PRegExprChar; //###0.933
+begin
+ // "Operandless" and "op != OP_BRANCH" are synonymous in practice.
+ if (p = nil) or (p = @regdummy) or (PREOp(p)^ <> OP_BRANCH) then
+ Exit;
+ Tail(p + REOpSz + RENextOffSz, val); // ###0.933
+end; { of procedure TRegExpr.OpTail
+ -------------------------------------------------------------- }
+
+function TRegExpr.EmitNode(op: TREOp): PRegExprChar; // ###0.933
// emit a node, return location
- begin
+begin
Result := regcode;
- if Result <> @regdummy then begin
- PREOp (regcode)^ := op;
- inc (regcode, REOpSz);
- PRENextOff (AlignToPtr(regcode))^ := 0; // Next "pointer" := nil
- inc (regcode, RENextOffSz);
- {$IFDEF DebugSynRegExpr}
- if regcode-programm>regsize then
- raise Exception.Create('TRegExpr.EmitNode buffer overrun');
- {$ENDIF}
- end
- else inc (regsize, REOpSz + RENextOffSz); // compute code size without code generation
- end; { of function TRegExpr.EmitNode
---------------------------------------------------------------}
+ if Result <> @regdummy then
+ begin
+ PREOp(regcode)^ := op;
+ Inc(regcode, REOpSz);
+ PRENextOff(AlignToPtr(regcode))^ := 0; // Next "pointer" := nil
+ Inc(regcode, RENextOffSz);
-procedure TRegExpr.EmitC (b : REChar);
-// emit a byte to code
- begin
- if regcode <> @regdummy then begin
- regcode^ := b;
- inc (regcode);
- {$IFDEF DebugSynRegExpr}
- if regcode-programm>regsize then
- raise Exception.Create('TRegExpr.EmitC buffer overrun');
- {$ENDIF}
- end
- else inc (regsize, REOpSz); // Type of p-code pointer always is ^REChar
- end; { of procedure TRegExpr.EmitC
---------------------------------------------------------------}
+ if (op = OP_EXACTLY) or (op = OP_EXACTLYCI) then
+ regExactlyLen := PLongInt(regcode)
+ else
+ regExactlyLen := nil;
+
+ {$IFDEF DebugSynRegExpr}
+ if regcode - programm > regsize then
+ raise Exception.Create('TRegExpr.EmitNode buffer overrun');
+ {$ENDIF}
+ end
+ else
+ Inc(regsize, REOpSz + RENextOffSz);
+ // compute code size without code generation
+end; { of function TRegExpr.EmitNode
+ -------------------------------------------------------------- }
+
+procedure TRegExpr.EmitC(ch: REChar); {$IFDEF InlineFuncs}inline;{$ENDIF}
+begin
+ if regcode <> @regdummy then
+ begin
+ regcode^ := ch;
+ Inc(regcode);
+ {$IFDEF DebugSynRegExpr}
+ if regcode - programm > regsize then
+ raise Exception.Create('TRegExpr.EmitC buffer overrun');
+ {$ENDIF}
+ end
+ else
+ Inc(regsize, REOpSz); // Type of p-code pointer always is ^REChar
+end; { of procedure TRegExpr.EmitC
+ -------------------------------------------------------------- }
-procedure TRegExpr.InsertOperator (op : TREOp; opnd : PRegExprChar; sz : integer);
+procedure TRegExpr.EmitInt(AValue: LongInt); {$IFDEF InlineFuncs}inline;{$ENDIF}
+begin
+ if regcode <> @regdummy then
+ begin
+ PLongInt(regcode)^ := AValue;
+ Inc(regcode, RENumberSz);
+ {$IFDEF DebugSynRegExpr}
+ if regcode - programm > regsize then
+ raise Exception.Create('TRegExpr.EmitInt buffer overrun');
+ {$ENDIF}
+ end
+ else
+ Inc(regsize, RENumberSz);
+end;
+
+procedure TRegExpr.InsertOperator(op: TREOp; opnd: PRegExprChar; sz: integer);
// insert an operator in front of already-emitted operand
// Means relocating the operand.
- var
- src, dst, place : PRegExprChar;
- i : integer;
- begin
- if regcode = @regdummy then begin
- inc (regsize, sz);
- EXIT;
- end;
+var
+ src, dst, place: PRegExprChar;
+ i: integer;
+begin
+ if regcode = @regdummy then
+ begin
+ Inc(regsize, sz);
+ Exit;
+ end;
// move code behind insert position
src := regcode;
- inc (regcode, sz);
+ Inc(regcode, sz);
{$IFDEF DebugSynRegExpr}
- if regcode-programm>regsize then
+ if regcode - programm > regsize then
raise Exception.Create('TRegExpr.InsertOperator buffer overrun');
-// if (opnd<regcode) or (opnd-regcode>regsize) then
- // raise Exception.Create('TRegExpr.InsertOperator invalid opnd');
+ // if (opnd<regcode) or (opnd-regcode>regsize) then
+ // raise Exception.Create('TRegExpr.InsertOperator invalid opnd');
{$ENDIF}
dst := regcode;
- while src > opnd do begin
- dec (dst);
- dec (src);
+ while src > opnd do
+ begin
+ Dec(dst);
+ Dec(src);
dst^ := src^;
- end;
+ end;
place := opnd; // Op node, where operand used to be.
- PREOp (place)^ := op;
- inc (place, REOpSz);
- for i := 1 + REOpSz to sz do begin
+ PREOp(place)^ := op;
+ Inc(place, REOpSz);
+ for i := 1 + REOpSz to sz do
+ begin
place^ := #0;
- inc (place);
- end;
- end; { of procedure TRegExpr.InsertOperator
---------------------------------------------------------------}
+ Inc(place);
+ end;
+end; { of procedure TRegExpr.InsertOperator
+ -------------------------------------------------------------- }
-function strcspn (s1 : PRegExprChar; s2 : PRegExprChar) : PtrInt;
-// find length of initial segment of s1 consisting
-// entirely of characters not from s2
- var scan1, scan2 : PRegExprChar;
- begin
+function FindSkippedMetaLen(PStart, PEnd: PRegExprChar): integer; {$IFDEF InlineFuncs}inline;{$ENDIF}
+// find length of initial segment of PStart string consisting
+// entirely of characters not from IsMetaSymbol1.
+begin
Result := 0;
- scan1 := s1;
- while scan1^ <> #0 do begin
- scan2 := s2;
- while scan2^ <> #0 do
- if scan1^ = scan2^
- then EXIT
- else inc (scan2);
- inc (Result);
- inc (scan1)
- end;
- end; { of function strcspn
---------------------------------------------------------------}
+ while PStart < PEnd do
+ begin
+ if _IsMetaSymbol1(PStart^) then
+ Exit;
+ Inc(Result);
+ Inc(PStart)
+ end;
+end;
const
-// Flags to be passed up and down.
- HASWIDTH = 01; // Known never to match nil string.
- SIMPLE = 02; // Simple enough to be STAR/PLUS/BRACES operand.
- SPSTART = 04; // Starts with * or +.
- WORST = 0; // Worst case.
- META : array [0 .. 12] of REChar = (
- '^', '$', '.', '[', '(', ')', '|', '?', '+', '*', EscChar, '{', #0);
- // Any modification must be synchronized with QuoteRegExprMetaChars !!!
-
-{$IFDEF UniCode}
- RusRangeLo : array [0 .. 33] of REChar =
- (#$430,#$431,#$432,#$433,#$434,#$435,#$451,#$436,#$437,
- #$438,#$439,#$43A,#$43B,#$43C,#$43D,#$43E,#$43F,
- #$440,#$441,#$442,#$443,#$444,#$445,#$446,#$447,
- #$448,#$449,#$44A,#$44B,#$44C,#$44D,#$44E,#$44F,#0);
- RusRangeHi : array [0 .. 33] of REChar =
- (#$410,#$411,#$412,#$413,#$414,#$415,#$401,#$416,#$417,
- #$418,#$419,#$41A,#$41B,#$41C,#$41D,#$41E,#$41F,
- #$420,#$421,#$422,#$423,#$424,#$425,#$426,#$427,
- #$428,#$429,#$42A,#$42B,#$42C,#$42D,#$42E,#$42F,#0);
- RusRangeLoLow = #$430{'а'};
- RusRangeLoHigh = #$44F{'я'};
- RusRangeHiLow = #$410{'А'};
- RusRangeHiHigh = #$42F{'Я'};
-{$ELSE}
- RusRangeLo = 'абвгдеёжзийклмнопрстуфхцчшщъыьэюя';
- RusRangeHi = 'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ';
- RusRangeLoLow = 'а';
- RusRangeLoHigh = 'я';
- RusRangeHiLow = 'А';
- RusRangeHiHigh = 'Я';
-{$ENDIF}
+ // Flags to be passed up and down.
+ flag_HasWidth = 01; // Known never to match nil string.
+ flag_Simple = 02; // Simple enough to be OP_STAR/OP_PLUS/OP_BRACES operand.
+ flag_SpecStart = 04; // Starts with * or +.
+ flag_Worst = 0; // Worst case.
+
+ {$IFDEF UniCode}
+ RusRangeLoLow = #$430; // 'а'
+ RusRangeLoHigh = #$44F; // 'я'
+ RusRangeHiLow = #$410; // 'А'
+ RusRangeHiHigh = #$42F; // 'Я'
+ {$ELSE}
+ RusRangeLoLow = #$E0; // 'а' in cp1251
+ RusRangeLoHigh = #$FF; // 'я' in cp1251
+ RusRangeHiLow = #$C0; // 'А' in cp1251
+ RusRangeHiHigh = #$DF; // 'Я' in cp1251
+ {$ENDIF}
-function TRegExpr.CompileRegExpr (exp : PRegExprChar) : boolean;
+function TRegExpr.FindInCharClass(ABuffer: PRegExprChar; AChar: REChar; AIgnoreCase: boolean): boolean;
+// Buffer contains char pairs: (Kind, Data), where Kind is one of OpKind_ values,
+// and Data depends on Kind
+var
+ ch, ch2: REChar;
+ N, i: integer;
+begin
+ if AIgnoreCase then
+ AChar := _UpperCase(AChar);
+ repeat
+ case ABuffer^ of
+ OpKind_End:
+ begin
+ Result := False;
+ Exit;
+ end;
+
+ OpKind_Range:
+ begin
+ Inc(ABuffer);
+ ch := ABuffer^;
+ Inc(ABuffer);
+ ch2 := ABuffer^;
+ Inc(ABuffer);
+ {
+ // if AIgnoreCase, ch, ch2 are upcased in opcode
+ if AIgnoreCase then
+ begin
+ ch := _UpperCase(ch);
+ ch2 := _UpperCase(ch2);
+ end;
+ }
+ if (AChar >= ch) and (AChar <= ch2) then
+ begin
+ Result := True;
+ Exit;
+ end;
+ end;
+
+ OpKind_MetaClass:
+ begin
+ Inc(ABuffer);
+ N := Ord(ABuffer^);
+ Inc(ABuffer);
+ if CharCheckers[N](AChar) then
+ begin
+ Result := True;
+ Exit
+ end;
+ end;
+
+ OpKind_Char:
+ begin
+ Inc(ABuffer);
+ N := PLongInt(ABuffer)^;
+ Inc(ABuffer, RENumberSz);
+ for i := 1 to N do
+ begin
+ ch := ABuffer^;
+ Inc(ABuffer);
+ {
+ // already upcased in opcode
+ if AIgnoreCase then
+ ch := _UpperCase(ch);
+ }
+ if ch = AChar then
+ begin
+ Result := True;
+ Exit;
+ end;
+ end;
+ end;
+
+ else
+ Error(reeBadOpcodeInCharClass);
+ end;
+ until False; // assume that Buffer is ended correctly
+end;
+
+
+procedure TRegExpr.GetCharSetFromWordChars(var ARes: TRegExprCharset);
+var
+ i: integer;
+ ch: REChar;
+begin
+ ARes := [];
+ for i := 1 to Length(fWordChars) do
+ begin
+ ch := fWordChars[i];
+ {$IFDEF UniCode}
+ if Ord(ch) <= $FF then
+ {$ENDIF}
+ Include(ARes, byte(ch));
+ end;
+end;
+
+procedure TRegExpr.GetCharSetFromSpaceChars(var ARes: TRegExprCharset);
+
+var
+ i: integer;
+ ch: REChar;
+
+begin
+ ARes := [];
+ for i := 1 to Length(fSpaceChars) do
+ begin
+ ch := fSpaceChars[i];
+ {$IFDEF UniCode}
+ if Ord(ch) <= $FF then
+ {$ENDIF}
+ Include(ARes, byte(ch));
+ end;
+end;
+
+procedure TRegExpr.GetCharSetFromCharClass(ABuffer: PRegExprChar; AIgnoreCase: boolean; var ARes: TRegExprCharset);
+var
+ ch, ch2: REChar;
+ TempSet: TRegExprCharSet;
+ N, i: integer;
+begin
+ ARes := [];
+ TempSet := [];
+ repeat
+ case ABuffer^ of
+ OpKind_End:
+ Exit;
+
+ OpKind_Range:
+ begin
+ Inc(ABuffer);
+ ch := ABuffer^;
+ Inc(ABuffer);
+ ch2 := ABuffer^;
+ Inc(ABuffer);
+ for i := Ord(ch) to
+ {$IFDEF UniCode} Min(Ord(ch2), $FF) {$ELSE} Ord(ch2) {$ENDIF} do
+ begin
+ Include(ARes, byte(i));
+ if AIgnoreCase then
+ Include(ARes, byte(InvertCase(REChar(i))));
+ end;
+ end;
+
+ OpKind_MetaClass:
+ begin
+ Inc(ABuffer);
+ N := Ord(ABuffer^);
+ Inc(ABuffer);
+
+ if N = CheckerIndex_Word then
+ begin
+ GetCharSetFromWordChars(TempSet);
+ ARes := ARes + TempSet;
+ end
+ else
+ if N = CheckerIndex_NotWord then
+ begin
+ GetCharSetFromWordChars(TempSet);
+ ARes := ARes + (RegExprAllSet - TempSet);
+ end
+ else
+ if N = CheckerIndex_Space then
+ begin
+ GetCharSetFromSpaceChars(TempSet);
+ ARes := ARes + TempSet;
+ end
+ else
+ if N = CheckerIndex_NotSpace then
+ begin
+ GetCharSetFromSpaceChars(TempSet);
+ ARes := ARes + (RegExprAllSet - TempSet);
+ end
+ else
+ if N = CheckerIndex_Digit then
+ ARes := ARes + RegExprDigitSet
+ else
+ if N = CheckerIndex_NotDigit then
+ ARes := ARes + (RegExprAllSet - RegExprDigitSet)
+ else
+ if N = CheckerIndex_VertSep then
+ ARes := ARes + RegExprLineSeparatorsSet
+ else
+ if N = CheckerIndex_NotVertSep then
+ ARes := ARes + (RegExprAllSet - RegExprLineSeparatorsSet)
+ else
+ if N = CheckerIndex_HorzSep then
+ ARes := ARes + RegExprHorzSeparatorsSet
+ else
+ if N = CheckerIndex_NotHorzSep then
+ ARes := ARes + (RegExprAllSet - RegExprHorzSeparatorsSet)
+ else
+ if N = CheckerIndex_LowerAZ then
+ begin
+ if AIgnoreCase then
+ ARes := ARes + RegExprAllAzSet
+ else
+ ARes := ARes + RegExprLowerAzSet;
+ end
+ else
+ if N = CheckerIndex_UpperAZ then
+ begin
+ if AIgnoreCase then
+ ARes := ARes + RegExprAllAzSet
+ else
+ ARes := ARes + RegExprUpperAzSet;
+ end
+ else
+ Error(reeBadOpcodeInCharClass);
+ end;
+
+ OpKind_Char:
+ begin
+ Inc(ABuffer);
+ N := PLongInt(ABuffer)^;
+ Inc(ABuffer, RENumberSz);
+ for i := 1 to N do
+ begin
+ ch := ABuffer^;
+ Inc(ABuffer);
+ {$IFDEF UniCode}
+ if Ord(ch) <= $FF then
+ {$ENDIF}
+ begin
+ Include(ARes, byte(ch));
+ if AIgnoreCase then
+ Include(ARes, byte(InvertCase(ch)));
+ end;
+ end;
+ end;
+
+ else
+ Error(reeBadOpcodeInCharClass);
+ end;
+ until False; // assume that Buffer is ended correctly
+end;
+
+
+function TRegExpr.GetModifierG: boolean;
+begin
+ Result := fModifiers.G;
+end;
+
+function TRegExpr.GetModifierI: boolean;
+begin
+ Result := fModifiers.I;
+end;
+
+function TRegExpr.GetModifierM: boolean;
+begin
+ Result := fModifiers.M;
+end;
+
+function TRegExpr.GetModifierR: boolean;
+begin
+ Result := fModifiers.R;
+end;
+
+function TRegExpr.GetModifierS: boolean;
+begin
+ Result := fModifiers.S;
+end;
+
+function TRegExpr.GetModifierX: boolean;
+begin
+ Result := fModifiers.X;
+end;
+
+function TRegExpr.CompileRegExpr(ARegExp: PRegExprChar): boolean;
// Compile a regular expression into internal code
// We can't allocate space until we know how big the compiled form will be,
// but we can't compile it (and thus know how big it is) until we've got a
@@ -1679,1194 +2236,1327 @@ function TRegExpr.CompileRegExpr (exp : PRegExprChar) : boolean;
// one piece because free() must be able to free it all.)
// Beware that the optimization-preparation code in here knows about some
// of the structure of the compiled regexp.
- var
- scan, longest : PRegExprChar;
- len : PtrUInt;
- flags : integer;
- begin
- Result := false; // life too dark
- flags:=0;
+var
+ scan, longest, longestTemp: PRegExprChar;
+ Len, LenTemp: integer;
+ flags: integer;
+begin
+ Result := False; // life too dark
+ flags := 0;
regparse := nil; // for correct error handling
- regexpbeg := exp;
+ regexpBegin := ARegExp;
+ regExactlyLen := nil;
+
+ ClearInternalIndexes;
+ fLastError := reeOk;
+ fLastErrorOpcode := TREOp(0);
+
try
+ if programm <> nil then
+ begin
+ FreeMem(programm);
+ programm := nil;
+ end;
- if programm <> nil then begin
- FreeMem (programm);
- programm := nil;
- end;
-
- if exp = nil then begin
- Error (reeCompNullArgument);
- EXIT;
- end;
-
- fProgModifiers := fModifiers;
- // well, may it's paranoia. I'll check it later... !!!!!!!!
-
- // First pass: determine size, legality.
- fCompModifiers := fModifiers;
- regparse := exp;
- regnpar := 1;
- regsize := 0;
- regcode := @regdummy;
- EmitC (MAGIC);
- if ParseReg (0, flags) = nil
- then EXIT;
-
- // Allocate space.
- GetMem (programm, regsize * SizeOf (REChar));
-
- // Second pass: emit code.
- fCompModifiers := fModifiers;
- regparse := exp;
- regnpar := 1;
- regcode := programm;
- EmitC (MAGIC);
- if ParseReg (0, flags) = nil
- then EXIT;
-
- // Dig out information for optimizations.
- {$IFDEF UseFirstCharSet} //###0.929
- FirstCharSet := [];
- FillFirstCharSet (programm + REOpSz);
- {$ENDIF}
- regstart := #0; // Worst-case defaults.
- reganch := #0;
- regmust := nil;
- regmlen := 0;
- scan := programm + REOpSz; // First BRANCH.
- if PREOp (regnext (scan))^ = EEND then begin // Only one top-level choice.
- scan := scan + REOpSz + RENextOffSz;
-
- // Starting-point info.
- if PREOp (scan)^ = EXACTLY
- then regstart := (scan + REOpSz + RENextOffSz)^
- else if PREOp (scan)^ = BOL
- then inc (reganch);
-
- // If there's something expensive in the r.e., find the longest
- // literal string that must appear and make it the regmust. Resolve
- // ties in favor of later strings, since the regstart check works
- // with the beginning of the r.e. and avoiding duplication
- // strengthens checking. Not a strong reason, but sufficient in the
- // absence of others.
- if (flags and SPSTART) <> 0 then begin
+ if ARegExp = nil then
+ begin
+ Error(reeCompNullArgument);
+ Exit;
+ end;
+
+ fProgModifiers := fModifiers;
+ // well, may it's paranoia. I'll check it later... !!!!!!!!
+
+ // First pass: determine size, legality.
+ fSecondPass := False;
+ fCompModifiers := fModifiers;
+ regparse := ARegExp;
+ regnpar := 1;
+ regsize := 0;
+ regcode := @regdummy;
+ EmitC(OP_MAGIC);
+ if ParseReg(0, flags) = nil then
+ Exit;
+
+ // Allocate space.
+ GetMem(programm, regsize * SizeOf(REChar));
+
+ // Second pass: emit code.
+ fSecondPass := True;
+ fCompModifiers := fModifiers;
+ regparse := ARegExp;
+ regnpar := 1;
+ regcode := programm;
+ EmitC(OP_MAGIC);
+ if ParseReg(0, flags) = nil then
+ Exit;
+
+ // Dig out information for optimizations.
+ {$IFDEF UseFirstCharSet} // ###0.929
+ FirstCharSet := [];
+ FillFirstCharSet(programm + REOpSz);
+ for Len := 0 to 255 do
+ FirstCharArray[Len] := byte(Len) in FirstCharSet;
+ {$ENDIF}
+
+ reganchored := #0;
+ regmust := nil;
+ regmustlen := 0;
+ regmustString := '';
+
+ scan := programm + REOpSz; // First OP_BRANCH.
+ if PREOp(regnext(scan))^ = OP_EEND then
+ begin // Only one top-level choice.
+ scan := scan + REOpSz + RENextOffSz;
+
+ // Starting-point info.
+ if PREOp(scan)^ = OP_BOL then
+ Inc(reganchored);
+
+ // If there's something expensive in the r.e., find the longest
+ // literal string that must appear and make it the regmust. Resolve
+ // ties in favor of later strings, since the regstart check works
+ // with the beginning of the r.e. and avoiding duplication
+ // strengthens checking. Not a strong reason, but sufficient in the
+ // absence of others.
+ if (flags and flag_SpecStart) <> 0 then
+ begin
longest := nil;
- len := 0;
- while scan <> nil do begin
- if (PREOp (scan)^ = EXACTLY)
- and (strlen (scan + REOpSz + RENextOffSz) >= PtrInt(len)) then begin
- longest := scan + REOpSz + RENextOffSz;
- len := strlen (longest);
- end;
- scan := regnext (scan);
- end;
+ Len := 0;
+ while scan <> nil do
+ begin
+ if PREOp(scan)^ = OP_EXACTLY then
+ begin
+ longestTemp := scan + REOpSz + RENextOffSz + RENumberSz;
+ LenTemp := PLongInt(scan + REOpSz + RENextOffSz)^;
+ if LenTemp >= Len then
+ begin
+ longest := longestTemp;
+ Len := LenTemp;
+ end;
+ end;
+ scan := regnext(scan);
+ end;
regmust := longest;
- regmlen := len;
- end;
- end;
-
- Result := true;
-
- finally begin
- if not Result
- then InvalidateProgramm;
- regexpbeg := nil;
- fExprIsCompiled := Result; //###0.944
- end;
+ regmustlen := Len;
+ if regmustlen > 1 then // don't use regmust if too short
+ SetString(regmustString, regmust, regmustlen);
+ end;
+ end;
+
+ Result := True;
+
+ finally
+ begin
+ if not Result then
+ InvalidateProgramm;
+ regexpBegin := nil;
+ regexpIsCompiled := Result; // ###0.944
+ end;
end;
- end; { of function TRegExpr.CompileRegExpr
---------------------------------------------------------------}
+end; { of function TRegExpr.CompileRegExpr
+ -------------------------------------------------------------- }
-procedure TRegExpr.SetUseOsLineEndOnReplace(AValue: Boolean);
+procedure TRegExpr.SetUseOsLineEndOnReplace(AValue: boolean);
begin
- if FUseOsLineEndOnReplace=AValue then Exit;
- FUseOsLineEndOnReplace:=AValue;
+ if FUseOsLineEndOnReplace = AValue then
+ Exit;
+ FUseOsLineEndOnReplace := AValue;
if FUseOsLineEndOnReplace then
- FReplaceLineEnd:=sLineBreak
+ FReplaceLineEnd := sLineBreak
else
- FReplaceLineEnd:=#10;
+ FReplaceLineEnd := #10;
end;
-function TRegExpr.ParseReg (paren : integer; var flagp : integer) : PRegExprChar;
+function TRegExpr.ParseReg(paren: integer; var flagp: integer): PRegExprChar;
// regular expression, i.e. main body or parenthesized thing
// Caller must absorb opening parenthesis.
// Combining parenthesis handling with the base level of regular expression
// is a trifle forced, but the need to tie the tails of the branches to what
// follows makes it hard to avoid.
- var
- ret, br, ender : PRegExprChar;
- parno : integer;
- flags : integer;
- SavedModifiers : integer;
- begin
- flags:=0;
+var
+ ret, br, ender: PRegExprChar;
+ parno: integer;
+ flags: integer;
+ SavedModifiers: TRegExprModifiers;
+begin
+ flags := 0;
Result := nil;
- flagp := HASWIDTH; // Tentatively.
+ flagp := flag_HasWidth; // Tentatively.
parno := 0; // eliminate compiler stupid warning
SavedModifiers := fCompModifiers;
- // Make an OPEN node, if parenthesized.
- if paren <> 0 then begin
- if regnpar >= NSUBEXP then begin
- Error (reeCompParseRegTooManyBrackets);
- EXIT;
- end;
- parno := regnpar;
- inc (regnpar);
- ret := EmitNode (TREOp (ord (OPEN) + parno));
- end
- else ret := nil;
+ // Make an OP_OPEN node, if parenthesized.
+ if paren <> 0 then
+ begin
+ if regnpar >= NSUBEXP then
+ begin
+ Error(reeCompParseRegTooManyBrackets);
+ Exit;
+ end;
+ parno := regnpar;
+ Inc(regnpar);
+ ret := EmitNode(TREOp(Ord(OP_OPEN) + parno));
+ end
+ else
+ ret := nil;
// Pick up the branches, linking them together.
- br := ParseBranch (flags);
- if br = nil then begin
+ br := ParseBranch(flags);
+ if br = nil then
+ begin
Result := nil;
- EXIT;
- end;
- if ret <> nil
- then Tail (ret, br) // OPEN -> first.
- else ret := br;
- if (flags and HASWIDTH) = 0
- then flagp := flagp and not HASWIDTH;
- flagp := flagp or flags and SPSTART;
- while (regparse^ = '|') do begin
- inc (regparse);
- br := ParseBranch (flags);
- if br = nil then begin
- Result := nil;
- EXIT;
- end;
- Tail (ret, br); // BRANCH -> BRANCH.
- if (flags and HASWIDTH) = 0
- then flagp := flagp and not HASWIDTH;
- flagp := flagp or flags and SPSTART;
- end;
+ Exit;
+ end;
+ if ret <> nil then
+ Tail(ret, br) // OP_OPEN -> first.
+ else
+ ret := br;
+ if (flags and flag_HasWidth) = 0 then
+ flagp := flagp and not flag_HasWidth;
+ flagp := flagp or flags and flag_SpecStart;
+ while (regparse^ = '|') do
+ begin
+ Inc(regparse);
+ br := ParseBranch(flags);
+ if br = nil then
+ begin
+ Result := nil;
+ Exit;
+ end;
+ Tail(ret, br); // OP_BRANCH -> OP_BRANCH.
+ if (flags and flag_HasWidth) = 0 then
+ flagp := flagp and not flag_HasWidth;
+ flagp := flagp or flags and flag_SpecStart;
+ end;
// Make a closing node, and hook it on the end.
- if paren <> 0
- then ender := EmitNode (TREOp (ord (CLOSE) + parno))
- else ender := EmitNode (EEND);
- Tail (ret, ender);
+ if paren <> 0 then
+ ender := EmitNode(TREOp(Ord(OP_CLOSE) + parno))
+ else
+ ender := EmitNode(OP_EEND);
+ Tail(ret, ender);
// Hook the tails of the branches to the closing node.
br := ret;
- while br <> nil do begin
- OpTail (br, ender);
- br := regnext (br);
- end;
+ while br <> nil do
+ begin
+ OpTail(br, ender);
+ br := regnext(br);
+ end;
// Check for proper termination.
if paren <> 0 then
- if regparse^ <> ')' then begin
- Error (reeCompParseRegUnmatchedBrackets);
- EXIT;
- end
- else inc (regparse); // skip trailing ')'
- if (paren = 0) and (regparse^ <> #0) then begin
- if regparse^ = ')'
- then Error (reeCompParseRegUnmatchedBrackets2)
- else Error (reeCompParseRegJunkOnEnd);
- EXIT;
- end;
+ if regparse^ <> ')' then
+ begin
+ Error(reeCompParseRegUnmatchedBrackets);
+ Exit;
+ end
+ else
+ Inc(regparse); // skip trailing ')'
+ if (paren = 0) and (regparse < fRegexEnd) then
+ begin
+ if regparse^ = ')' then
+ Error(reeCompParseRegUnmatchedBrackets2)
+ else
+ Error(reeCompParseRegJunkOnEnd);
+ Exit;
+ end;
fCompModifiers := SavedModifiers; // restore modifiers of parent
Result := ret;
- end; { of function TRegExpr.ParseReg
---------------------------------------------------------------}
+end; { of function TRegExpr.ParseReg
+ -------------------------------------------------------------- }
-function TRegExpr.ParseBranch (var flagp : integer) : PRegExprChar;
+function TRegExpr.ParseBranch(var flagp: integer): PRegExprChar;
// one alternative of an | operator
// Implements the concatenation operator.
- var
- ret, chain, latest : PRegExprChar;
- flags : integer;
- begin
- flags:=0;
- flagp := WORST; // Tentatively.
-
- ret := EmitNode (BRANCH);
+var
+ ret, chain, latest: PRegExprChar;
+ flags: integer;
+begin
+ flags := 0;
+ flagp := flag_Worst; // Tentatively.
+
+ ret := EmitNode(OP_BRANCH);
chain := nil;
- while (regparse^ <> #0) and (regparse^ <> '|')
- and (regparse^ <> ')') do begin
- latest := ParsePiece (flags);
- if latest = nil then begin
+ while (regparse < fRegexEnd) and (regparse^ <> '|') and (regparse^ <> ')') do
+ begin
+ latest := ParsePiece(flags);
+ if latest = nil then
+ begin
Result := nil;
- EXIT;
- end;
- flagp := flagp or flags and HASWIDTH;
+ Exit;
+ end;
+ flagp := flagp or flags and flag_HasWidth;
if chain = nil // First piece.
- then flagp := flagp or flags and SPSTART
- else Tail (chain, latest);
+ then
+ flagp := flagp or flags and flag_SpecStart
+ else
+ Tail(chain, latest);
chain := latest;
- end;
+ end;
if chain = nil // Loop ran zero times.
- then EmitNode (NOTHING);
+ then
+ EmitNode(OP_NOTHING);
Result := ret;
- end; { of function TRegExpr.ParseBranch
---------------------------------------------------------------}
+end; { of function TRegExpr.ParseBranch
+ -------------------------------------------------------------- }
-function TRegExpr.ParsePiece (var flagp : integer) : PRegExprChar;
+function TRegExpr.ParsePiece(var flagp: integer): PRegExprChar;
// something followed by possible [*+?{]
// Note that the branching code sequences used for ? and the general cases
-// of * and + and { are somewhat optimized: they use the same NOTHING node as
+// of * and + and { are somewhat optimized: they use the same OP_NOTHING node as
// both the endmarker for their branch list and the body of the last branch.
// It might seem that this node could be dispensed with entirely, but the
// endmarker role is not redundant.
- function parsenum (AStart, AEnd : PRegExprChar) : TREBracesArg;
+
+ function ParseNumber(AStart, AEnd: PRegExprChar): TREBracesArg;
begin
- Result := 0;
- if AEnd - AStart + 1 > 8 then begin // prevent stupid scanning
- Error (reeBRACESArgTooBig);
- EXIT;
+ Result := 0;
+ if AEnd - AStart + 1 > 8 then
+ begin // prevent stupid scanning
+ Error(reeBRACESArgTooBig);
+ Exit;
end;
- while AStart <= AEnd do begin
- Result := Result * 10 + (ord (AStart^) - ord ('0'));
- inc (AStart);
- end;
- if (Result > MaxBracesArg) or (Result < 0) then begin
- Error (reeBRACESArgTooBig);
- EXIT;
+ while AStart <= AEnd do
+ begin
+ Result := Result * 10 + (Ord(AStart^) - Ord('0'));
+ Inc(AStart);
+ end;
+ if (Result > MaxBracesArg) or (Result < 0) then
+ begin
+ Error(reeBRACESArgTooBig);
+ Exit;
end;
end;
- var
- op : REChar;
- NonGreedyOp, NonGreedyCh : boolean; //###0.940
- TheOp : TREOp; //###0.940
- NextNode : PRegExprChar;
- flags : integer;
- BracesMin, Bracesmax : TREBracesArg;
- p, savedparse : PRegExprChar;
-
- procedure EmitComplexBraces (ABracesMin, ABracesMax : TREBracesArg;
- ANonGreedyOp : boolean); //###0.940
+var
+ TheOp: TREOp;
+ NextNode: PRegExprChar;
+
+ procedure EmitComplexBraces(ABracesMin, ABracesMax: TREBracesArg; ANonGreedyOp: boolean); // ###0.940
{$IFDEF ComplexBraces}
var
- off : TRENextOff;
+ off: TRENextOff;
{$ENDIF}
- begin
- {$IFNDEF ComplexBraces}
- Error (reeComplexBracesNotImplemented);
- {$ELSE}
- if ANonGreedyOp
- then TheOp := LOOPNG
- else TheOp := LOOP;
- InsertOperator (LOOPENTRY, Result, REOpSz + RENextOffSz);
- NextNode := EmitNode (TheOp);
- if regcode <> @regdummy then begin
- off := (Result + REOpSz + RENextOffSz)
- - (regcode - REOpSz - RENextOffSz); // back to Atom after LOOPENTRY
- PREBracesArg (AlignToInt(regcode))^ := ABracesMin;
- inc (regcode, REBracesArgSz);
- PREBracesArg (AlignToInt(regcode))^ := ABracesMax;
- inc (regcode, REBracesArgSz);
- PRENextOff (AlignToPtr(regcode))^ := off;
- inc (regcode, RENextOffSz);
+ begin
+ {$IFNDEF ComplexBraces}
+ Error(reeComplexBracesNotImplemented);
+ {$ELSE}
+ if ANonGreedyOp then
+ TheOp := OP_LOOPNG
+ else
+ TheOp := OP_LOOP;
+ InsertOperator(OP_LOOPENTRY, Result, REOpSz + RENextOffSz);
+ NextNode := EmitNode(TheOp);
+ if regcode <> @regdummy then
+ begin
+ off := (Result + REOpSz + RENextOffSz) - (regcode - REOpSz - RENextOffSz);
+ // back to Atom after OP_LOOPENTRY
+ PREBracesArg(AlignToInt(regcode))^ := ABracesMin;
+ Inc(regcode, REBracesArgSz);
+ PREBracesArg(AlignToInt(regcode))^ := ABracesMax;
+ Inc(regcode, REBracesArgSz);
+ PRENextOff(AlignToPtr(regcode))^ := off;
+ Inc(regcode, RENextOffSz);
{$IFDEF DebugSynRegExpr}
- if regcode-programm>regsize then
- raise Exception.Create('TRegExpr.ParsePiece.EmitComplexBraces buffer overrun');
+ if regcode - programm > regsize then
+ raise Exception.Create
+ ('TRegExpr.ParsePiece.EmitComplexBraces buffer overrun');
{$ENDIF}
- end
- else inc (regsize, REBracesArgSz * 2 + RENextOffSz);
- Tail (Result, NextNode); // LOOPENTRY -> LOOP
- if regcode <> @regdummy then
- Tail (Result + REOpSz + RENextOffSz, NextNode); // Atom -> LOOP
- {$ENDIF}
+ end
+ else
+ Inc(regsize, REBracesArgSz * 2 + RENextOffSz);
+ Tail(Result, NextNode); // OP_LOOPENTRY -> OP_LOOP
+ if regcode <> @regdummy then
+ Tail(Result + REOpSz + RENextOffSz, NextNode); // Atom -> OP_LOOP
+ {$ENDIF}
end;
- procedure EmitSimpleBraces (ABracesMin, ABracesMax : TREBracesArg;
- ANonGreedyOp : boolean); //###0.940
+ procedure EmitSimpleBraces(ABracesMin, ABracesMax: TREBracesArg; ANonGreedyOp: boolean); // ###0.940
begin
- if ANonGreedyOp //###0.940
- then TheOp := BRACESNG
- else TheOp := BRACES;
- InsertOperator (TheOp, Result, REOpSz + RENextOffSz + REBracesArgSz * 2);
- if regcode <> @regdummy then begin
- PREBracesArg (AlignToInt(Result + REOpSz + RENextOffSz))^ := ABracesMin;
- PREBracesArg (AlignToInt(Result + REOpSz + RENextOffSz + REBracesArgSz))^ := ABracesMax;
+ if ANonGreedyOp // ###0.940
+ then
+ TheOp := OP_BRACESNG
+ else
+ TheOp := OP_BRACES;
+ InsertOperator(TheOp, Result, REOpSz + RENextOffSz + REBracesArgSz * 2);
+ if regcode <> @regdummy then
+ begin
+ PREBracesArg(AlignToInt(Result + REOpSz + RENextOffSz))^ := ABracesMin;
+ PREBracesArg(AlignToInt(Result + REOpSz + RENextOffSz + REBracesArgSz))^ := ABracesMax;
end;
end;
- begin
- flags:=0;
- Result := ParseAtom (flags);
- if Result = nil
- then EXIT;
+var
+ op: REChar;
+ NonGreedyOp, NonGreedyCh: boolean; // ###0.940
+ flags: integer;
+ BracesMin, Bracesmax: TREBracesArg;
+ p: PRegExprChar;
+begin
+ flags := 0;
+ Result := ParseAtom(flags);
+ if Result = nil then
+ Exit;
op := regparse^;
- if not ((op = '*') or (op = '+') or (op = '?') or (op = '{')) then begin
+ if not ((op = '*') or (op = '+') or (op = '?') or (op = '{')) then
+ begin
flagp := flags;
- EXIT;
- end;
- if ((flags and HASWIDTH) = 0) and (op <> '?') then begin
- Error (reePlusStarOperandCouldBeEmpty);
- EXIT;
- end;
+ Exit;
+ end;
+ if ((flags and flag_HasWidth) = 0) and (op <> '?') then
+ begin
+ Error(reePlusStarOperandCouldBeEmpty);
+ Exit;
+ end;
case op of
- '*': begin
- flagp := WORST or SPSTART;
- NonGreedyCh := (regparse + 1)^ = '?'; //###0.940
- NonGreedyOp := NonGreedyCh or ((fCompModifiers and MaskModG) = 0); //###0.940
- if (flags and SIMPLE) = 0 then begin
- if NonGreedyOp //###0.940
- then EmitComplexBraces (0, MaxBracesArg, NonGreedyOp)
- else begin // Emit x* as (x&|), where & means "self".
- InsertOperator (BRANCH, Result, REOpSz + RENextOffSz); // Either x
- OpTail (Result, EmitNode (BACK)); // and loop
- OpTail (Result, Result); // back
- Tail (Result, EmitNode (BRANCH)); // or
- Tail (Result, EmitNode (NOTHING)); // nil.
- end
+ '*':
+ begin
+ flagp := flag_Worst or flag_SpecStart;
+ NonGreedyCh := (regparse + 1)^ = '?'; // ###0.940
+ NonGreedyOp := NonGreedyCh or not fCompModifiers.G;
+ // ###0.940
+ if (flags and flag_Simple) = 0 then
+ begin
+ if NonGreedyOp // ###0.940
+ then
+ EmitComplexBraces(0, MaxBracesArg, NonGreedyOp)
+ else
+ begin // Emit x* as (x&|), where & means "self".
+ InsertOperator(OP_BRANCH, Result, REOpSz + RENextOffSz); // Either x
+ OpTail(Result, EmitNode(OP_BACK)); // and loop
+ OpTail(Result, Result); // back
+ Tail(Result, EmitNode(OP_BRANCH)); // or
+ Tail(Result, EmitNode(OP_NOTHING)); // nil.
+ end
end
- else begin // Simple
- if NonGreedyOp //###0.940
- then TheOp := STARNG
- else TheOp := STAR;
- InsertOperator (TheOp, Result, REOpSz + RENextOffSz);
+ else
+ begin // Simple
+ if NonGreedyOp // ###0.940
+ then
+ TheOp := OP_STARNG
+ else
+ TheOp := OP_STAR;
+ InsertOperator(TheOp, Result, REOpSz + RENextOffSz);
end;
- if NonGreedyCh //###0.940
- then inc (regparse); // Skip extra char ('?')
- end; { of case '*'}
- '+': begin
- flagp := WORST or SPSTART or HASWIDTH;
- NonGreedyCh := (regparse + 1)^ = '?'; //###0.940
- NonGreedyOp := NonGreedyCh or ((fCompModifiers and MaskModG) = 0); //###0.940
- if (flags and SIMPLE) = 0 then begin
- if NonGreedyOp //###0.940
- then EmitComplexBraces (1, MaxBracesArg, NonGreedyOp)
- else begin // Emit x+ as x(&|), where & means "self".
- NextNode := EmitNode (BRANCH); // Either
- Tail (Result, NextNode);
- Tail (EmitNode (BACK), Result); // loop back
- Tail (NextNode, EmitNode (BRANCH)); // or
- Tail (Result, EmitNode (NOTHING)); // nil.
- end
+ if NonGreedyCh // ###0.940
+ then
+ Inc(regparse); // Skip extra char ('?')
+ end; { of case '*' }
+ '+':
+ begin
+ flagp := flag_Worst or flag_SpecStart or flag_HasWidth;
+ NonGreedyCh := (regparse + 1)^ = '?'; // ###0.940
+ NonGreedyOp := NonGreedyCh or not fCompModifiers.G;
+ // ###0.940
+ if (flags and flag_Simple) = 0 then
+ begin
+ if NonGreedyOp // ###0.940
+ then
+ EmitComplexBraces(1, MaxBracesArg, NonGreedyOp)
+ else
+ begin // Emit x+ as x(&|), where & means "self".
+ NextNode := EmitNode(OP_BRANCH); // Either
+ Tail(Result, NextNode);
+ Tail(EmitNode(OP_BACK), Result); // loop back
+ Tail(NextNode, EmitNode(OP_BRANCH)); // or
+ Tail(Result, EmitNode(OP_NOTHING)); // nil.
+ end
end
- else begin // Simple
- if NonGreedyOp //###0.940
- then TheOp := PLUSNG
- else TheOp := PLUS;
- InsertOperator (TheOp, Result, REOpSz + RENextOffSz);
+ else
+ begin // Simple
+ if NonGreedyOp // ###0.940
+ then
+ TheOp := OP_PLUSNG
+ else
+ TheOp := OP_PLUS;
+ InsertOperator(TheOp, Result, REOpSz + RENextOffSz);
end;
- if NonGreedyCh //###0.940
- then inc (regparse); // Skip extra char ('?')
- end; { of case '+'}
- '?': begin
- flagp := WORST;
- NonGreedyCh := (regparse + 1)^ = '?'; //###0.940
- NonGreedyOp := NonGreedyCh or ((fCompModifiers and MaskModG) = 0); //###0.940
- if NonGreedyOp then begin //###0.940 // We emit x?? as x{0,1}?
- if (flags and SIMPLE) = 0
- then EmitComplexBraces (0, 1, NonGreedyOp)
- else EmitSimpleBraces (0, 1, NonGreedyOp);
+ if NonGreedyCh // ###0.940
+ then
+ Inc(regparse); // Skip extra char ('?')
+ end; { of case '+' }
+ '?':
+ begin
+ flagp := flag_Worst;
+ NonGreedyCh := (regparse + 1)^ = '?'; // ###0.940
+ NonGreedyOp := NonGreedyCh or not fCompModifiers.G;
+ // ###0.940
+ if NonGreedyOp then
+ begin // ###0.940 // We emit x?? as x{0,1}?
+ if (flags and flag_Simple) = 0 then
+ EmitComplexBraces(0, 1, NonGreedyOp)
+ else
+ EmitSimpleBraces(0, 1, NonGreedyOp);
end
- else begin // greedy '?'
- InsertOperator (BRANCH, Result, REOpSz + RENextOffSz); // Either x
- Tail (Result, EmitNode (BRANCH)); // or
- NextNode := EmitNode (NOTHING); // nil.
- Tail (Result, NextNode);
- OpTail (Result, NextNode);
+ else
+ begin // greedy '?'
+ InsertOperator(OP_BRANCH, Result, REOpSz + RENextOffSz); // Either x
+ Tail(Result, EmitNode(OP_BRANCH)); // or
+ NextNode := EmitNode(OP_NOTHING); // nil.
+ Tail(Result, NextNode);
+ OpTail(Result, NextNode);
end;
- if NonGreedyCh //###0.940
- then inc (regparse); // Skip extra char ('?')
- end; { of case '?'}
- '{': begin
- savedparse := regparse;
- // !!!!!!!!!!!!
- // Filip Jirsak's note - what will happen, when we are at the end of regparse?
- inc (regparse);
- p := regparse;
- while Pos (regparse^, '0123456789') > 0 // <min> MUST appear
- do inc (regparse);
- if (regparse^ <> '}') and (regparse^ <> ',') or (p = regparse) then begin
- regparse := savedparse;
- flagp := flags;
- EXIT;
- end;
- BracesMin := parsenum (p, regparse - 1);
- if regparse^ = ',' then begin
- inc (regparse);
- p := regparse;
- while Pos (regparse^, '0123456789') > 0
- do inc (regparse);
- if regparse^ <> '}' then begin
- regparse := savedparse;
- EXIT;
+ if NonGreedyCh // ###0.940
+ then
+ Inc(regparse); // Skip extra char ('?')
+ end; { of case '?' }
+ '{':
+ begin
+ Inc(regparse);
+ p := regparse;
+ while IsDigitChar(regparse^) do // <min> MUST appear
+ Inc(regparse);
+ if (regparse^ <> '}') and (regparse^ <> ',') or (p = regparse) then
+ begin
+ Error(reeIncorrectBraces);
+ Exit;
+ end;
+ BracesMin := ParseNumber(p, regparse - 1);
+ if regparse^ = ',' then
+ begin
+ Inc(regparse);
+ p := regparse;
+ while IsDigitChar(regparse^) do
+ Inc(regparse);
+ if regparse^ <> '}' then
+ begin
+ Error(reeIncorrectBraces);
+ Exit;
end;
- if p = regparse
- then BracesMax := MaxBracesArg
- else BracesMax := parsenum (p, regparse - 1);
+ if p = regparse then
+ Bracesmax := MaxBracesArg
+ else
+ Bracesmax := ParseNumber(p, regparse - 1);
end
- else BracesMax := BracesMin; // {n} == {n,n}
- if BracesMin > BracesMax then begin
- Error (reeBracesMinParamGreaterMax);
- EXIT;
- end;
- if BracesMin > 0
- then flagp := WORST;
- if BracesMax > 0
- then flagp := flagp or HASWIDTH or SPSTART;
-
- NonGreedyCh := (regparse + 1)^ = '?'; //###0.940
- NonGreedyOp := NonGreedyCh or ((fCompModifiers and MaskModG) = 0); //###0.940
- if (flags and SIMPLE) <> 0
- then EmitSimpleBraces (BracesMin, BracesMax, NonGreedyOp)
- else EmitComplexBraces (BracesMin, BracesMax, NonGreedyOp);
- if NonGreedyCh //###0.940
- then inc (regparse); // Skip extra char '?'
- end; // of case '{'
-// else // here we can't be
- end; { of case op}
-
- inc (regparse);
- if (regparse^ = '*') or (regparse^ = '+') or (regparse^ = '?') or (regparse^ = '{') then begin
- Error (reeNestedSQP);
- EXIT;
- end;
- end; { of function TRegExpr.ParsePiece
---------------------------------------------------------------}
-
-function TRegExpr.HexDig (ch : REChar) : PtrInt;
+ else
+ Bracesmax := BracesMin; // {n} == {n,n}
+ if BracesMin > Bracesmax then
+ begin
+ Error(reeBracesMinParamGreaterMax);
+ Exit;
+ end;
+ if BracesMin > 0 then
+ flagp := flag_Worst;
+ if Bracesmax > 0 then
+ flagp := flagp or flag_HasWidth or flag_SpecStart;
+
+ NonGreedyCh := (regparse + 1)^ = '?'; // ###0.940
+ NonGreedyOp := NonGreedyCh or not fCompModifiers.G;
+ // ###0.940
+ if (flags and flag_Simple) <> 0 then
+ EmitSimpleBraces(BracesMin, Bracesmax, NonGreedyOp)
+ else
+ EmitComplexBraces(BracesMin, Bracesmax, NonGreedyOp);
+ if NonGreedyCh // ###0.940
+ then
+ Inc(regparse); // Skip extra char '?'
+ end; // of case '{'
+ // else // here we can't be
+ end; { of case op }
+
+ Inc(regparse);
+ op := regparse^;
+ if (op = '*') or (op = '+') or (op = '?') or (op = '{') then
+ Error(reeNestedSQP);
+end; { of function TRegExpr.ParsePiece
+ -------------------------------------------------------------- }
- begin
- Result := Ord(Ch);
- Case Result of
- Ord('A')..Ord('F') : Result:=10+Result-Ord('A');
- Ord('a')..Ord('f') : Result:=10+Result-Ord('a');
- Ord('0')..Ord('9') : Result:=Result-Ord('0');
- else
- Error (reeBadHexDigit);
+function TRegExpr.HexDig(Ch: REChar): integer;
+begin
+ case Ch of
+ '0' .. '9':
+ Result := Ord(Ch) - Ord('0');
+ 'a' .. 'f':
+ Result := Ord(Ch) - Ord('a') + 10;
+ 'A' .. 'F':
+ Result := Ord(Ch) - Ord('A') + 10;
+ else
+ begin
+ Result := 0;
+ Error(reeBadHexDigit);
+ end;
end;
- end;
+end;
-function TRegExpr.UnQuoteChar (var APtr : PRegExprChar) : REChar;
- begin
+function TRegExpr.UnQuoteChar(var APtr: PRegExprChar): REChar;
+var
+ Ch: REChar;
+begin
case APtr^ of
- 't': Result := #$9; // \t => tab (HT/TAB)
- 'n': Result := #$a; // \n => newline (NL)
- 'r': Result := #$d; // \r => carriage return (CR)
- 'f': Result := #$c; // \f => form feed (FF)
- 'a': Result := #$7; // \a => alarm (bell) (BEL)
- 'e': Result := #$1b; // \e => escape (ESC)
- 'x': begin // \x: hex char
- Result := #0;
- inc (APtr);
- if APtr^ = #0 then begin
- Error (reeNoHexCodeAfterBSlashX);
- EXIT;
- end;
- if APtr^ = '{' then begin // \x{nnnn} //###0.936
- REPEAT
- inc (APtr);
- if APtr^ = #0 then begin
- Error (reeNoHexCodeAfterBSlashX);
- EXIT;
- end;
- if APtr^ <> '}' then begin
- if (Ord (Result)
- ShR (SizeOf (REChar) * 8 - 4)) and $F <> 0 then begin
- Error (reeHexCodeAfterBSlashXTooBig);
- EXIT;
+ 't':
+ Result := #$9; // \t => tab (HT/TAB)
+ 'n':
+ Result := #$a; // \n => newline (NL)
+ 'r':
+ Result := #$d; // \r => carriage return (CR)
+ 'f':
+ Result := #$c; // \f => form feed (FF)
+ 'a':
+ Result := #$7; // \a => alarm (bell) (BEL)
+ 'e':
+ Result := #$1b; // \e => escape (ESC)
+ 'c':
+ begin // \cK => code for Ctrl+K
+ Inc(APtr);
+ if APtr >= fRegexEnd then
+ Error(reeNoLetterAfterBSlashC);
+ Ch := APtr^;
+ case Ch of
+ 'a' .. 'z':
+ Result := REChar(Ord(Ch) - Ord('a') + 1);
+ 'A' .. 'Z':
+ Result := REChar(Ord(Ch) - Ord('A') + 1);
+ else
+ Error(reeNoLetterAfterBSlashC);
+ end;
+ end;
+ 'x':
+ begin // \x: hex char
+ Result := #0;
+ Inc(APtr);
+ if APtr >= fRegexEnd then
+ begin
+ Error(reeNoHexCodeAfterBSlashX);
+ Exit;
+ end;
+ if APtr^ = '{' then
+ begin // \x{nnnn} //###0.936
+ repeat
+ Inc(APtr);
+ if APtr >= fRegexEnd then
+ begin
+ Error(reeNoHexCodeAfterBSlashX);
+ Exit;
+ end;
+ if APtr^ <> '}' then
+ begin
+ if (Ord(Result) ShR (SizeOf(REChar) * 8 - 4)) and $F <> 0 then
+ begin
+ Error(reeHexCodeAfterBSlashXTooBig);
+ Exit;
end;
- Result := REChar ((Ord (Result) ShL 4) or HexDig (APtr^));
- // HexDig will cause Error if bad hex digit found
+ Result := REChar((Ord(Result) ShL 4) or HexDig(APtr^));
+ // HexDig will cause Error if bad hex digit found
end
- else BREAK;
- UNTIL False;
+ else
+ Break;
+ until False;
end
- else begin
- Result := REChar (HexDig (APtr^));
- // HexDig will cause Error if bad hex digit found
- inc (APtr);
- if APtr^ = #0 then begin
- Error (reeNoHexCodeAfterBSlashX);
- EXIT;
+ else
+ begin
+ Result := REChar(HexDig(APtr^));
+ // HexDig will cause Error if bad hex digit found
+ Inc(APtr);
+ if APtr >= fRegexEnd then
+ begin
+ Error(reeNoHexCodeAfterBSlashX);
+ Exit;
end;
- Result := REChar ((Ord (Result) ShL 4) or HexDig (APtr^));
- // HexDig will cause Error if bad hex digit found
+ Result := REChar((Ord(Result) ShL 4) or HexDig(APtr^));
+ // HexDig will cause Error if bad hex digit found
end;
- end;
- else Result := APtr^;
- end;
- end;
-
+ end;
+ else
+ Result := APtr^;
+ end;
+end;
-function TRegExpr.ParseAtom (var flagp : integer) : PRegExprChar;
+function TRegExpr.ParseAtom(var flagp: integer): PRegExprChar;
// the lowest level
// Optimization: gobbles an entire sequence of ordinary characters so that
// it can turn them into a single node, which is smaller to store and
// faster to run. Backslashed characters are exceptions, each becoming a
// separate node; the code is simpler that way and it's not worth fixing.
- var
- ret : PRegExprChar;
- flags : integer;
- RangeBeg, RangeEnd : REChar;
- CanBeRange : boolean;
- len : PtrInt;
- ender : REChar;
- begmodfs : PRegExprChar;
-
- {$IFDEF UseSetOfChar} //###0.930
- RangePCodeBeg : PRegExprChar;
- RangePCodeIdx : PtrInt;
- RangeIsCI : boolean;
- RangeSet : TSetOfREChar;
- RangeLen : PtrInt;
- RangeChMin, RangeChMax : REChar;
- {$ENDIF}
-
- procedure EmitExactly (ch : REChar);
- begin
- if (fCompModifiers and MaskModI) <> 0
- then ret := EmitNode (EXACTLYCI)
- else ret := EmitNode (EXACTLY);
- EmitC (ch);
- EmitC (#0);
- flagp := flagp or HASWIDTH or SIMPLE;
- end;
+var
+ ret: PRegExprChar;
+ RangeBeg, RangeEnd: REChar;
+ CanBeRange: boolean;
+ AddrOfLen: PLongInt;
- procedure EmitStr (const s : RegExprString);
- var i : PtrInt;
+ procedure EmitExactly(Ch: REChar); {$IFDEF InlineFuncs}inline;{$ENDIF}
begin
- for i := 1 to length (s)
- do EmitC (s [i]);
+ if fCompModifiers.I then
+ ret := EmitNode(OP_EXACTLYCI)
+ else
+ ret := EmitNode(OP_EXACTLY);
+ EmitInt(1);
+ EmitC(Ch);
+ flagp := flagp or flag_HasWidth or flag_Simple;
end;
- function EmitRange (AOpCode : REChar) : PRegExprChar;
+ procedure EmitRangeChar(Ch: REChar; AStartOfRange: boolean); {$IFDEF InlineFuncs}inline;{$ENDIF}
begin
- {$IFDEF UseSetOfChar}
- case AOpCode of
- ANYBUTCI, ANYBUT:
- Result := EmitNode (ANYBUTTINYSET);
- else // ANYOFCI, ANYOF
- Result := EmitNode (ANYOFTINYSET);
- end;
- case AOpCode of
- ANYBUTCI, ANYOFCI:
- RangeIsCI := True;
- else // ANYBUT, ANYOF
- RangeIsCI := False;
+ CanBeRange := AStartOfRange;
+ if fCompModifiers.I then
+ Ch := _UpperCase(Ch);
+ if AStartOfRange then
+ begin
+ AddrOfLen := nil;
+ RangeBeg := Ch;
+ end
+ else
+ begin
+ if AddrOfLen = nil then
+ begin
+ EmitC(OpKind_Char);
+ Pointer(AddrOfLen) := regcode;
+ EmitInt(0);
+ end;
+ Inc(AddrOfLen^);
+ EmitC(Ch);
end;
- RangePCodeBeg := regcode;
- RangePCodeIdx := regsize;
- RangeLen := 0;
- RangeSet := [];
- RangeChMin := #255;
- RangeChMax := #0;
- {$ELSE}
- Result := EmitNode (AOpCode);
- // ToDo:
- // !!!!!!!!!!!!! Implement ANYOF[BUT]TINYSET generation for UniCode !!!!!!!!!!
- {$ENDIF}
- end;
-
-{$IFDEF UseSetOfChar}
- procedure EmitRangeCPrim (b : REChar); //###0.930
- begin
- if b in RangeSet
- then EXIT;
- inc (RangeLen);
- if b < RangeChMin
- then RangeChMin := b;
- if b > RangeChMax
- then RangeChMax := b;
- Include (RangeSet, b);
end;
- {$ENDIF}
- procedure EmitRangeC (b : REChar);
- {$IFDEF UseSetOfChar}
+ procedure EmitRangePacked(ch1, ch2: REChar); {$IFDEF InlineFuncs}inline;{$ENDIF}
var
- Ch : REChar;
- {$ENDIF}
+ ChkIndex: integer;
begin
- CanBeRange := false;
- {$IFDEF UseSetOfChar}
- if b <> #0 then begin
- EmitRangeCPrim (b); //###0.930
- if RangeIsCI
- then EmitRangeCPrim (InvertCase (b)); //###0.930
- end
- else begin
- {$IFDEF UseAsserts}
- Assert (RangeLen > 0, 'TRegExpr.ParseAtom(subroutine EmitRangeC): empty range'); // impossible, but who knows..
- Assert (RangeChMin <= RangeChMax, 'TRegExpr.ParseAtom(subroutine EmitRangeC): RangeChMin > RangeChMax'); // impossible, but who knows..
- {$ENDIF}
- if RangeLen <= TinySetLen then begin // emit "tiny set"
- if regcode = @regdummy then begin
- regsize := RangePCodeIdx + TinySetLen; // RangeChMin/Max !!!
- EXIT;
- end;
- regcode := RangePCodeBeg;
- for Ch := RangeChMin to RangeChMax do //###0.930
- if Ch in RangeSet then begin
- regcode^ := Ch;
- inc (regcode);
- end;
- // fill rest:
- while regcode < RangePCodeBeg + TinySetLen do begin
- regcode^ := RangeChMax;
- inc (regcode);
- end;
- {$IFDEF DebugSynRegExpr}
- if regcode-programm>regsize then
- raise Exception.Create('TRegExpr.ParseAtom.EmitRangeC TinySetLen buffer overrun');
- {$ENDIF}
- end
- else begin
- if regcode = @regdummy then begin
- regsize := RangePCodeIdx + SizeOf (TSetOfREChar);
- EXIT;
- end;
- if (RangePCodeBeg - REOpSz - RENextOffSz)^ = ANYBUTTINYSET
- then RangeSet := [#0 .. #255] - RangeSet;
- PREOp (RangePCodeBeg - REOpSz - RENextOffSz)^ := ANYOFFULLSET;
- regcode := RangePCodeBeg;
- Move (RangeSet, regcode^, SizeOf (TSetOfREChar));
- inc (regcode, SizeOf (TSetOfREChar));
- {$IFDEF DebugSynRegExpr}
- if regcode-programm>regsize then
- raise Exception.Create('TRegExpr.ParseAtom.EmitRangeC non TinySetLen buffer overrun');
- {$ENDIF}
- end;
- end;
- {$ELSE}
- EmitC (b);
- {$ENDIF}
- end;
+ AddrOfLen := nil;
+ CanBeRange := False;
- procedure EmitSimpleRangeC (b : REChar);
- begin
- RangeBeg := b;
- EmitRangeC (b);
- CanBeRange := true;
- end;
+ if fCompModifiers.I then
+ begin
+ ch1 := _UpperCase(ch1);
+ ch2 := _UpperCase(ch2);
+ end;
- procedure EmitRangeStr (const s : RegExprString);
- var i : PtrInt;
- begin
- for i := 1 to length (s)
- do EmitRangeC (s [i]);
+ for ChkIndex := Low(CharCheckerInfos) to High(CharCheckerInfos) do
+ if (CharCheckerInfos[ChkIndex].CharBegin = ch1) and
+ (CharCheckerInfos[ChkIndex].CharEnd = ch2) then
+ begin
+ EmitC(OpKind_MetaClass);
+ EmitC(REChar(CharCheckerInfos[ChkIndex].CheckerIndex));
+ Exit;
+ end;
+
+ EmitC(OpKind_Range);
+ EmitC(ch1);
+ EmitC(ch2);
end;
- begin
+var
+ flags: integer;
+ Len: integer;
+ SavedPtr: PRegExprChar;
+ EnderChar, TempChar: REChar;
+begin
Result := nil;
- flags:=0;
- flagp := WORST; // Tentatively.
+ flags := 0;
+ flagp := flag_Worst;
+ AddrOfLen := nil;
- inc (regparse);
+ Inc(regparse);
case (regparse - 1)^ of
- '^': if ((fCompModifiers and MaskModM) = 0)
- or ((fLineSeparators = '') and not fLinePairedSeparatorAssigned)
- then ret := EmitNode (BOL)
- else ret := EmitNode (BOLML);
- '$': if ((fCompModifiers and MaskModM) = 0)
- or ((fLineSeparators = '') and not fLinePairedSeparatorAssigned)
- then ret := EmitNode (EOL)
- else ret := EmitNode (EOLML);
+ '^':
+ if not fCompModifiers.M or
+ ((fLineSeparators = '') and not fLinePairedSeparatorAssigned) then
+ ret := EmitNode(OP_BOL)
+ else
+ ret := EmitNode(OP_BOLML);
+ '$':
+ if not fCompModifiers.M or
+ ((fLineSeparators = '') and not fLinePairedSeparatorAssigned) then
+ ret := EmitNode(OP_EOL)
+ else
+ ret := EmitNode(OP_EOLML);
'.':
- if (fCompModifiers and MaskModS) <> 0 then begin
- ret := EmitNode (ANY);
- flagp := flagp or HASWIDTH or SIMPLE;
- end
- else begin // not /s, so emit [^:LineSeparators:]
- ret := EmitNode (ANYML);
- flagp := flagp or HASWIDTH; // not so simple ;)
-// ret := EmitRange (ANYBUT);
-// EmitRangeStr (LineSeparators); //###0.941
-// EmitRangeStr (LinePairedSeparator); // !!! isn't correct if have to accept only paired
-// EmitRangeC (#0);
-// flagp := flagp or HASWIDTH or SIMPLE;
- end;
- '[': begin
- if regparse^ = '^' then begin // Complement of range.
- if (fCompModifiers and MaskModI) <> 0
- then ret := EmitRange (ANYBUTCI)
- else ret := EmitRange (ANYBUT);
- inc (regparse);
- end
- else
- if (fCompModifiers and MaskModI) <> 0
- then ret := EmitRange (ANYOFCI)
- else ret := EmitRange (ANYOF);
-
- CanBeRange := false;
-
- if (regparse^ = ']') then begin
- EmitSimpleRangeC (regparse^); // []-a] -> ']' .. 'a'
- inc (regparse);
- end;
-
- while (regparse^ <> #0) and (regparse^ <> ']') do begin
- if (regparse^ = '-')
- and ((regparse + 1)^ <> #0) and ((regparse + 1)^ <> ']')
- and CanBeRange then begin
- inc (regparse);
- RangeEnd := regparse^;
- if RangeEnd = EscChar then begin
- {$IFDEF UniCode} //###0.935
- if (ord ((regparse + 1)^) < 256)
- and (char ((regparse + 1)^)
- in ['d', 'D', 's', 'S', 'w', 'W']) then begin
- {$ELSE}
- if (regparse + 1)^ in ['d', 'D', 's', 'S', 'w', 'W'] then begin
- {$ENDIF}
- EmitRangeC ('-'); // or treat as error ?!!
- CONTINUE;
- end;
- inc (regparse);
- RangeEnd := UnQuoteChar (regparse);
+ if fCompModifiers.S then
+ begin
+ ret := EmitNode(OP_ANY);
+ flagp := flagp or flag_HasWidth or flag_Simple;
+ end
+ else
+ begin // not /s, so emit [^:LineSeparators:]
+ ret := EmitNode(OP_ANYML);
+ flagp := flagp or flag_HasWidth; // not so simple ;)
+ end;
+ '[':
+ begin
+ if regparse^ = '^' then
+ begin // Complement of range.
+ if fCompModifiers.I then
+ ret := EmitNode(OP_ANYBUTCI)
+ else
+ ret := EmitNode(OP_ANYBUT);
+ Inc(regparse);
+ end
+ else if fCompModifiers.I then
+ ret := EmitNode(OP_ANYOFCI)
+ else
+ ret := EmitNode(OP_ANYOF);
+
+ CanBeRange := False;
+
+ if regparse^ = ']' then
+ begin
+ // first ']' inside [] treated as simple char, no need to check '['
+ EmitRangeChar(regparse^, (regparse + 1)^ = '-');
+ Inc(regparse);
+ end;
+
+ while (regparse < fRegexEnd) and (regparse^ <> ']') do
+ begin
+ if (regparse^ = '-') and ((regparse + 1) < fRegexEnd) and
+ ((regparse + 1)^ <> ']') and CanBeRange then
+ begin
+ Inc(regparse);
+ RangeEnd := regparse^;
+ if RangeEnd = EscChar then
+ begin
+ if _IsMetaChar((regparse + 1)^) then
+ begin
+ Error(reeMetaCharAfterMinusInRange);
+ Exit;
end;
+ Inc(regparse);
+ RangeEnd := UnQuoteChar(regparse);
+ end;
- // r.e.ranges extension for russian
- if ((fCompModifiers and MaskModR) <> 0)
- and (RangeBeg = RusRangeLoLow) and (RangeEnd = RusRangeLoHigh) then begin
- EmitRangeStr (RusRangeLo);
- end
- else if ((fCompModifiers and MaskModR) <> 0)
- and (RangeBeg = RusRangeHiLow) and (RangeEnd = RusRangeHiHigh) then begin
- EmitRangeStr (RusRangeHi);
- end
- else if ((fCompModifiers and MaskModR) <> 0)
- and (RangeBeg = RusRangeLoLow) and (RangeEnd = RusRangeHiHigh) then begin
- EmitRangeStr (RusRangeLo);
- EmitRangeStr (RusRangeHi);
- end
- else begin // standard r.e. handling
- if RangeBeg > RangeEnd then begin
- Error (reeInvalidRange);
- EXIT;
- end;
- inc (RangeBeg);
- EmitRangeC (RangeEnd); // prevent infinite loop if RangeEnd=$ff
- while RangeBeg < RangeEnd do begin //###0.929
- EmitRangeC (RangeBeg);
- inc (RangeBeg);
+ // special handling for Russian range a-YA, add 2 ranges: a-ya and A-YA
+ if fCompModifiers.R and
+ (RangeBeg = RusRangeLoLow) and (RangeEnd = RusRangeHiHigh) then
+ begin
+ EmitRangePacked(RusRangeLoLow, RusRangeLoHigh);
+ EmitRangePacked(RusRangeHiLow, RusRangeHiHigh);
+ end
+ else
+ begin // standard r.e. handling
+ if RangeBeg > RangeEnd then
+ begin
+ Error(reeInvalidRange);
+ Exit;
+ end;
+ EmitRangePacked(RangeBeg, RangeEnd);
+ end;
+ Inc(regparse);
+ end
+ else
+ begin
+ if regparse^ = EscChar then
+ begin
+ Inc(regparse);
+ if regparse >= fRegexEnd then
+ begin
+ Error(reeParseAtomTrailingBackSlash);
+ Exit;
+ end;
+ if _IsMetaChar(regparse^) then
+ begin
+ AddrOfLen := nil;
+ CanBeRange := False;
+ EmitC(OpKind_MetaClass);
+ case regparse^ of
+ 'w':
+ EmitC(REChar(CheckerIndex_Word));
+ 'W':
+ EmitC(REChar(CheckerIndex_NotWord));
+ 's':
+ EmitC(REChar(CheckerIndex_Space));
+ 'S':
+ EmitC(REChar(CheckerIndex_NotSpace));
+ 'd':
+ EmitC(REChar(CheckerIndex_Digit));
+ 'D':
+ EmitC(REChar(CheckerIndex_NotDigit));
+ 'v':
+ EmitC(REChar(CheckerIndex_VertSep));
+ 'V':
+ EmitC(REChar(CheckerIndex_NotVertSep));
+ 'h':
+ EmitC(REChar(CheckerIndex_HorzSep));
+ 'H':
+ EmitC(REChar(CheckerIndex_NotHorzSep));
+ else
+ Error(reeBadOpcodeInCharClass);
end;
+ end
+ else
+ begin
+ TempChar := UnQuoteChar(regparse);
+ EmitRangeChar(TempChar, (regparse + 1)^ = '-');
end;
- inc (regparse);
end
- else begin
- if regparse^ = EscChar then begin
- inc (regparse);
- if regparse^ = #0 then begin
- Error (reeParseAtomTrailingBackSlash);
- EXIT;
- end;
- case regparse^ of // r.e.extensions
- 'd': EmitRangeStr ('0123456789');
- 'w': EmitRangeStr (WordChars);
- 's': EmitRangeStr (SpaceChars);
- else EmitSimpleRangeC (UnQuoteChar (regparse));
- end; { of case}
- end
- else EmitSimpleRangeC (regparse^);
- inc (regparse);
+ else
+ begin
+ EmitRangeChar(regparse^, (regparse + 1)^ = '-');
end;
- end; { of while}
- EmitRangeC (#0);
- if regparse^ <> ']' then begin
- Error (reeUnmatchedSqBrackets);
- EXIT;
- end;
- inc (regparse);
- flagp := flagp or HASWIDTH or SIMPLE;
+ Inc(regparse);
+ end;
+ end; { of while }
+ AddrOfLen := nil;
+ CanBeRange := False;
+ EmitC(OpKind_End);
+ if regparse^ <> ']' then
+ begin
+ Error(reeUnmatchedSqBrackets);
+ Exit;
+ end;
+ Inc(regparse);
+ flagp := flagp or flag_HasWidth or flag_Simple;
end;
- '(': begin
- if regparse^ = '?' then begin
- // check for extended Perl syntax : (?..)
- if (regparse + 1)^ = '#' then begin // (?#comment)
- inc (regparse, 2); // find closing ')'
- while (regparse^ <> #0) and (regparse^ <> ')')
- do inc (regparse);
- if regparse^ <> ')' then begin
- Error (reeUnclosedComment);
- EXIT;
- end;
- inc (regparse); // skip ')'
- ret := EmitNode (COMMENT); // comment
- end
- else begin // modifiers ?
- inc (regparse); // skip '?'
- begmodfs := regparse;
- while (regparse^ <> #0) and (regparse^ <> ')')
- do inc (regparse);
- if (regparse^ <> ')')
- or not ParseModifiersStr (copy (begmodfs, 1, (regparse - begmodfs)), fCompModifiers) then begin
- Error (reeUrecognizedModifier);
- EXIT;
- end;
- inc (regparse); // skip ')'
- ret := EmitNode (COMMENT); // comment
-// Error (reeQPSBFollowsNothing);
-// EXIT;
+ '(':
+ begin
+ if regparse^ = '?' then
+ begin
+ // check for non-capturing group: (?:text)
+ if (regparse + 1)^ = ':' then
+ begin
+ Inc(regparse, 2);
+ ret := ParseReg(1, flags);
+ if ret = nil then
+ begin
+ Result := nil;
+ Exit;
end;
+ flagp := flagp or flags and (flag_HasWidth or flag_SpecStart);
end
- else begin
- ret := ParseReg (1, flags);
- if ret = nil then begin
- Result := nil;
- EXIT;
+ else
+ // check for extended Perl syntax : (?..)
+ if (regparse + 1)^ = '#' then
+ begin // (?#comment)
+ Inc(regparse, 2); // find closing ')'
+ while (regparse < fRegexEnd) and (regparse^ <> ')') do
+ Inc(regparse);
+ if regparse^ <> ')' then
+ begin
+ Error(reeUnclosedComment);
+ Exit;
+ end;
+ Inc(regparse); // skip ')'
+ ret := EmitNode(OP_COMMENT); // comment
+ end
+ else
+ begin // modifiers ?
+ Inc(regparse); // skip '?'
+ SavedPtr := regparse;
+ while (regparse < fRegexEnd) and (regparse^ <> ')') do
+ Inc(regparse);
+ if (regparse^ <> ')') or
+ not ParseModifiers(SavedPtr, regparse - SavedPtr, fCompModifiers) then
+ begin
+ Error(reeUnrecognizedModifier);
+ Exit;
+ end;
+ Inc(regparse); // skip ')'
+ ret := EmitNode(OP_COMMENT); // comment
+ // Error (reeQPSBFollowsNothing);
+ // Exit;
end;
- flagp := flagp or flags and (HASWIDTH or SPSTART);
+ end
+ else
+ begin
+ // normal (capturing) group
+ if fSecondPass then
+ // must skip this block for one of passes, to not double groups count
+ if GrpCount < NSUBEXP - 1 then
+ begin
+ Inc(GrpCount);
+ GrpIndexes[GrpCount] := regnpar;
+ end;
+ ret := ParseReg(1, flags);
+ if ret = nil then
+ begin
+ Result := nil;
+ Exit;
end;
+ flagp := flagp or flags and (flag_HasWidth or flag_SpecStart);
+ end;
end;
- #0, '|', ')': begin // Supposed to be caught earlier.
- Error (reeInternalUrp);
- EXIT;
+ '|', ')':
+ begin // Supposed to be caught earlier.
+ Error(reeInternalUrp);
+ Exit;
end;
- '?', '+', '*': begin
- Error (reeQPSBFollowsNothing);
- EXIT;
+ '?', '+', '*':
+ begin
+ Error(reeQPSBFollowsNothing);
+ Exit;
end;
- EscChar: begin
- if regparse^ = #0 then begin
- Error (reeTrailingBackSlash);
- EXIT;
- end;
+ EscChar:
+ begin
+ if regparse >= fRegexEnd then
+ begin
+ Error(reeTrailingBackSlash);
+ Exit;
+ end;
case regparse^ of // r.e.extensions
- 'b': ret := EmitNode (BOUND); //###0.943
- 'B': ret := EmitNode (NOTBOUND); //###0.943
- 'A': ret := EmitNode (BOL); //###0.941
- 'Z': ret := EmitNode (EOL); //###0.941
- 'd': begin // r.e.extension - any digit ('0' .. '9')
- ret := EmitNode (ANYDIGIT);
- flagp := flagp or HASWIDTH or SIMPLE;
+ 'b':
+ ret := EmitNode(OP_BOUND); // ###0.943
+ 'B':
+ ret := EmitNode(OP_NOTBOUND); // ###0.943
+ 'A':
+ ret := EmitNode(OP_BOL); // ###0.941
+ 'Z':
+ ret := EmitNode(OP_EOL); // ###0.941
+ 'd':
+ begin // r.e.extension - any digit ('0' .. '9')
+ ret := EmitNode(OP_ANYDIGIT);
+ flagp := flagp or flag_HasWidth or flag_Simple;
end;
- 'D': begin // r.e.extension - not digit ('0' .. '9')
- ret := EmitNode (NOTDIGIT);
- flagp := flagp or HASWIDTH or SIMPLE;
+ 'D':
+ begin // r.e.extension - not digit ('0' .. '9')
+ ret := EmitNode(OP_NOTDIGIT);
+ flagp := flagp or flag_HasWidth or flag_Simple;
end;
- 's': begin // r.e.extension - any space char
- {$IFDEF UseSetOfChar}
- ret := EmitRange (ANYOF);
- EmitRangeStr (SpaceChars);
- EmitRangeC (#0);
- {$ELSE}
- ret := EmitNode (ANYSPACE);
- {$ENDIF}
- flagp := flagp or HASWIDTH or SIMPLE;
+ 's':
+ begin // r.e.extension - any space char
+ ret := EmitNode(OP_ANYSPACE);
+ flagp := flagp or flag_HasWidth or flag_Simple;
end;
- 'S': begin // r.e.extension - not space char
- {$IFDEF UseSetOfChar}
- ret := EmitRange (ANYBUT);
- EmitRangeStr (SpaceChars);
- EmitRangeC (#0);
- {$ELSE}
- ret := EmitNode (NOTSPACE);
- {$ENDIF}
- flagp := flagp or HASWIDTH or SIMPLE;
+ 'S':
+ begin // r.e.extension - not space char
+ ret := EmitNode(OP_NOTSPACE);
+ flagp := flagp or flag_HasWidth or flag_Simple;
end;
- 'w': begin // r.e.extension - any english char / digit / '_'
- {$IFDEF UseSetOfChar}
- ret := EmitRange (ANYOF);
- EmitRangeStr (WordChars);
- EmitRangeC (#0);
- {$ELSE}
- ret := EmitNode (ANYLETTER);
- {$ENDIF}
- flagp := flagp or HASWIDTH or SIMPLE;
+ 'w':
+ begin // r.e.extension - any english char / digit / '_'
+ ret := EmitNode(OP_ANYLETTER);
+ flagp := flagp or flag_HasWidth or flag_Simple;
end;
- 'W': begin // r.e.extension - not english char / digit / '_'
- {$IFDEF UseSetOfChar}
- ret := EmitRange (ANYBUT);
- EmitRangeStr (WordChars);
- EmitRangeC (#0);
- {$ELSE}
- ret := EmitNode (NOTLETTER);
- {$ENDIF}
- flagp := flagp or HASWIDTH or SIMPLE;
+ 'W':
+ begin // r.e.extension - not english char / digit / '_'
+ ret := EmitNode(OP_NOTLETTER);
+ flagp := flagp or flag_HasWidth or flag_Simple;
end;
- '1' .. '9': begin //###0.936
- if (fCompModifiers and MaskModI) <> 0
- then ret := EmitNode (BSUBEXPCI)
- else ret := EmitNode (BSUBEXP);
- EmitC (REChar (ord (regparse^) - ord ('0')));
- flagp := flagp or HASWIDTH or SIMPLE;
+ 'v':
+ begin
+ ret := EmitNode(OP_ANYVERTSEP);
+ flagp := flagp or flag_HasWidth or flag_Simple;
+ end;
+ 'V':
+ begin
+ ret := EmitNode(OP_NOTVERTSEP);
+ flagp := flagp or flag_HasWidth or flag_Simple;
end;
- else EmitExactly (UnQuoteChar (regparse));
- end; { of case}
- inc (regparse);
+ 'h':
+ begin
+ ret := EmitNode(OP_ANYHORZSEP);
+ flagp := flagp or flag_HasWidth or flag_Simple;
+ end;
+ 'H':
+ begin
+ ret := EmitNode(OP_NOTHORZSEP);
+ flagp := flagp or flag_HasWidth or flag_Simple;
+ end;
+ '1' .. '9':
+ begin // ###0.936
+ if fCompModifiers.I then
+ ret := EmitNode(OP_BSUBEXPCI)
+ else
+ ret := EmitNode(OP_BSUBEXP);
+ EmitC(REChar(Ord(regparse^) - Ord('0')));
+ flagp := flagp or flag_HasWidth or flag_Simple;
+ end;
+ else
+ EmitExactly(UnQuoteChar(regparse));
+ end; { of case }
+ Inc(regparse);
end;
- else begin
- dec (regparse);
- if ((fCompModifiers and MaskModX) <> 0) and // check for eXtended syntax
- ((regparse^ = '#')
- or ({$IFDEF UniCode}StrScan (XIgnoredChars, regparse^) <> nil //###0.947
- {$ELSE}regparse^ in XIgnoredChars{$ENDIF})) then begin //###0.941 \x
- if regparse^ = '#' then begin // Skip eXtended comment
- // find comment terminator (group of \n and/or \r)
- while (regparse^ <> #0) and (regparse^ <> #$d) and (regparse^ <> #$a)
- do inc (regparse);
- while (regparse^ = #$d) or (regparse^ = #$a) // skip comment terminator
- do inc (regparse); // attempt to support different type of line separators
- end
- else begin // Skip the blanks!
- while {$IFDEF UniCode}StrScan (XIgnoredChars, regparse^) <> nil //###0.947
- {$ELSE}regparse^ in XIgnoredChars{$ENDIF}
- do inc (regparse);
- end;
- ret := EmitNode (COMMENT); // comment
+ else
+ begin
+ Dec(regparse);
+ if fCompModifiers.X and // check for eXtended syntax
+ ((regparse^ = '#') or IsIgnoredChar(regparse^)) then
+ begin // ###0.941 \x
+ if regparse^ = '#' then
+ begin // Skip eXtended comment
+ // find comment terminator (group of \n and/or \r)
+ while (regparse < fRegexEnd) and (regparse^ <> #$d) and
+ (regparse^ <> #$a) do
+ Inc(regparse);
+ while (regparse^ = #$d) or (regparse^ = #$a)
+ // skip comment terminator
+ do
+ Inc(regparse);
+ // attempt to support different type of line separators
end
- else begin
- len := strcspn (regparse, META);
- if len <= 0 then
- if regparse^ <> '{' then begin
- Error (reeRarseAtomInternalDisaster);
- EXIT;
- end
- else len := strcspn (regparse + 1, META) + 1; // bad {n,m} - compile as EXATLY
- ender := (regparse + len)^;
- if (len > 1)
- and ((ender = '*') or (ender = '+') or (ender = '?') or (ender = '{'))
- then dec (len); // Back off clear of ?+*{ operand.
- flagp := flagp or HASWIDTH;
- if len = 1
- then flagp := flagp or SIMPLE;
- if (fCompModifiers and MaskModI) <> 0
- then ret := EmitNode (EXACTLYCI)
- else ret := EmitNode (EXACTLY);
- while (len > 0)
- and (((fCompModifiers and MaskModX) = 0) or (regparse^ <> '#')) do begin
- if ((fCompModifiers and MaskModX) = 0) or not ( //###0.941
- {$IFDEF UniCode}StrScan (XIgnoredChars, regparse^) <> nil //###0.947
- {$ELSE}regparse^ in XIgnoredChars{$ENDIF} )
- then EmitC (regparse^);
- inc (regparse);
- dec (len);
+ else
+ begin // Skip the blanks!
+ while IsIgnoredChar(regparse^) do
+ Inc(regparse);
+ end;
+ ret := EmitNode(OP_COMMENT); // comment
+ end
+ else
+ begin
+ Len := FindSkippedMetaLen(regparse, fRegexEnd);
+ if Len <= 0 then
+ if regparse^ <> '{' then
+ begin
+ Error(reeRarseAtomInternalDisaster);
+ Exit;
+ end
+ else
+ Len := FindSkippedMetaLen(regparse + 1, fRegexEnd) + 1;
+ // bad {n,m} - compile as EXACTLY
+ EnderChar := (regparse + Len)^;
+ if (Len > 1) and ((EnderChar = '*') or (EnderChar = '+') or (EnderChar = '?') or (EnderChar = '{')) then
+ Dec(Len); // back off clear of ?+*{ operand.
+ flagp := flagp or flag_HasWidth;
+ if Len = 1 then
+ flagp := flagp or flag_Simple;
+ if fCompModifiers.I then
+ ret := EmitNode(OP_EXACTLYCI)
+ else
+ ret := EmitNode(OP_EXACTLY);
+ EmitInt(0);
+ while (Len > 0) and ((not fCompModifiers.X) or (regparse^ <> '#')) do
+ begin
+ if not fCompModifiers.X or not IsIgnoredChar(regparse^) then
+ begin
+ EmitC(regparse^);
+ if regcode <> @regdummy then
+ Inc(regExactlyLen^);
end;
- EmitC (#0);
- end; { of if not comment}
- end; { of case else}
- end; { of case}
+ Inc(regparse);
+ Dec(Len);
+ end;
+ end; { of if not comment }
+ end; { of case else }
+ end; { of case }
Result := ret;
- end; { of function TRegExpr.ParseAtom
---------------------------------------------------------------}
+end; { of function TRegExpr.ParseAtom
+ -------------------------------------------------------------- }
-function TRegExpr.GetCompilerErrorPos : PtrInt;
- begin
+function TRegExpr.GetCompilerErrorPos: PtrInt;
+begin
Result := 0;
- if (regexpbeg = nil) or (regparse = nil)
- then EXIT; // not in compiling mode ?
- Result := regparse - regexpbeg;
- end; { of function TRegExpr.GetCompilerErrorPos
---------------------------------------------------------------}
+ if (regexpBegin = nil) or (regparse = nil) then
+ Exit; // not in compiling mode ?
+ Result := regparse - regexpBegin;
+end; { of function TRegExpr.GetCompilerErrorPos
+ -------------------------------------------------------------- }
+{ ============================================================= }
+{ ===================== Matching section ====================== }
+{ ============================================================= }
-{=============================================================}
-{===================== Matching section ======================}
-{=============================================================}
-
-{$IFNDEF UseSetOfChar}
-function TRegExpr.StrScanCI (s : PRegExprChar; ch : REChar) : PRegExprChar; //###0.928 - now method of TRegExpr
- begin
- while (s^ <> #0) and (s^ <> ch) and (s^ <> InvertCase (ch))
- do inc (s);
- if s^ <> #0
- then Result := s
- else Result := nil;
- end; { of function TRegExpr.StrScanCI
---------------------------------------------------------------}
-{$ENDIF}
-
-function TRegExpr.regrepeat (p : PRegExprChar; AMax : PtrInt) : PtrInt;
+function TRegExpr.regrepeat(p: PRegExprChar; AMax: integer): integer;
// repeatedly match something simple, report how many
- var
- scan : PRegExprChar;
- opnd : PRegExprChar;
- TheMax : integer;
- {Ch,} InvCh : REChar; //###0.931
- sestart, seend : PRegExprChar; //###0.936
- begin
+var
+ scan: PRegExprChar;
+ opnd: PRegExprChar;
+ TheMax, NLen: integer;
+ InvChar: REChar; // ###0.931
+ GrpStart, GrpEnd: PRegExprChar; // ###0.936
+ ArrayIndex: integer;
+begin
Result := 0;
scan := reginput;
- opnd := p + REOpSz + RENextOffSz; //OPERAND
+ opnd := p + REOpSz + RENextOffSz; // OPERAND
TheMax := fInputEnd - scan;
- if TheMax > AMax
- then TheMax := AMax;
- case PREOp (p)^ of
- ANY: begin
- // note - ANYML cannot be proceeded in regrepeat because can skip
- // more than one char at once
- Result := TheMax;
- inc (scan, Result);
- end;
- EXACTLY: begin // in opnd can be only ONE char !!!
-// Ch := opnd^; // store in register //###0.931
- while (Result < TheMax) and (opnd^ = scan^) do begin
- inc (Result);
- inc (scan);
- end;
- end;
- EXACTLYCI: begin // in opnd can be only ONE char !!!
-// Ch := opnd^; // store in register //###0.931
- while (Result < TheMax) and (opnd^ = scan^) do begin // prevent unneeded InvertCase //###0.931
- inc (Result);
- inc (scan);
- end;
- if Result < TheMax then begin //###0.931
- InvCh := InvertCase (opnd^); // store in register
- while (Result < TheMax) and
- ((opnd^ = scan^) or (InvCh = scan^)) do begin
- inc (Result);
- inc (scan);
- end;
- end;
- end;
- BSUBEXP: begin //###0.936
- sestart := startp [ord (opnd^)];
- if sestart = nil
- then EXIT;
- seend := endp [ord (opnd^)];
- if seend = nil
- then EXIT;
- REPEAT
- opnd := sestart;
- while opnd < seend do begin
- if (scan >= fInputEnd) or (scan^ <> opnd^)
- then EXIT;
- inc (scan);
- inc (opnd);
- end;
- inc (Result);
- reginput := scan;
- UNTIL Result >= AMax;
- end;
- BSUBEXPCI: begin //###0.936
- sestart := startp [ord (opnd^)];
- if sestart = nil
- then EXIT;
- seend := endp [ord (opnd^)];
- if seend = nil
- then EXIT;
- REPEAT
- opnd := sestart;
- while opnd < seend do begin
- if (scan >= fInputEnd) or
- ((scan^ <> opnd^) and (scan^ <> InvertCase (opnd^)))
- then EXIT;
- inc (scan);
- inc (opnd);
- end;
- inc (Result);
- reginput := scan;
- UNTIL Result >= AMax;
- end;
- ANYDIGIT:
- while (Result < TheMax) and isDigit(Scan) do
- begin
- inc (Result);
- inc (scan);
- end;
- NOTDIGIT:
- while (Result < TheMax) and not IsDigit(Scan) do
- begin
- inc (Result);
- inc (scan);
- end;
- {$IFNDEF UseSetOfChar} //###0.929
- ANYLETTER:
- while (Result < TheMax) and IsWordChar(scan^) do //###0.940
- begin
- inc (Result);
- inc (scan);
- end;
- NOTLETTER:
- while (Result < TheMax) and not IsWordChar(scan^) do //###0.940
+ if TheMax > AMax then
+ TheMax := AMax;
+ case PREOp(p)^ of
+ OP_ANY:
+ begin
+ // note - OP_ANYML cannot be proceeded in regrepeat because can skip
+ // more than one char at once
+ Result := TheMax;
+ Inc(scan, Result);
+ end;
+ OP_EXACTLY:
+ begin // in opnd can be only ONE char !!!
+ NLen := PLongInt(opnd)^;
+ if TheMax > NLen then
+ TheMax := NLen;
+ Inc(opnd, RENumberSz);
+ while (Result < TheMax) and (opnd^ = scan^) do
begin
- inc (Result);
- inc (scan);
+ Inc(Result);
+ Inc(scan);
end;
- ANYSPACE:
- while (Result < TheMax) and IsSpaceChar(scan) do
- begin
- inc (Result);
- inc (scan);
+ end;
+ OP_EXACTLYCI:
+ begin // in opnd can be only ONE char !!!
+ NLen := PLongInt(opnd)^;
+ if TheMax > NLen then
+ TheMax := NLen;
+ Inc(opnd, RENumberSz);
+ while (Result < TheMax) and (opnd^ = scan^) do
+ begin // prevent unneeded InvertCase //###0.931
+ Inc(Result);
+ Inc(scan);
end;
- NOTSPACE:
- while (Result < TheMax) and Not IsSpaceChar(scan) do
- begin
- inc (Result);
- inc (scan);
+ if Result < TheMax then
+ begin // ###0.931
+ InvChar := InvertCase(opnd^); // store in register
+ while (Result < TheMax) and ((opnd^ = scan^) or (InvChar = scan^)) do
+ begin
+ Inc(Result);
+ Inc(scan);
+ end;
end;
- {$ENDIF}
- ANYOFTINYSET: begin
- while (Result < TheMax) and //!!!TinySet
- ((scan^ = opnd^) or (scan^ = (opnd + 1)^)
- or (scan^ = (opnd + 2)^)) do begin
- inc (Result);
- inc (scan);
- end;
- end;
- ANYBUTTINYSET: begin
- while (Result < TheMax) and //!!!TinySet
- (scan^ <> opnd^) and (scan^ <> (opnd + 1)^)
- and (scan^ <> (opnd + 2)^) do begin
- inc (Result);
- inc (scan);
- end;
- end;
- {$IFDEF UseSetOfChar} //###0.929
- ANYOFFULLSET: begin
- while (Result < TheMax) and
- (scan^ in PSetOfREChar (opnd)^) do begin
- inc (Result);
- inc (scan);
- end;
- end;
- {$ELSE}
- ANYOF:
- while (Result < TheMax) and
- (StrScan (opnd, scan^) <> nil) do begin
- inc (Result);
- inc (scan);
- end;
- ANYBUT:
- while (Result < TheMax) and
- (StrScan (opnd, scan^) = nil) do begin
- inc (Result);
- inc (scan);
- end;
- ANYOFCI:
- while (Result < TheMax) and (StrScanCI (opnd, scan^) <> nil) do begin
- inc (Result);
- inc (scan);
- end;
- ANYBUTCI:
- while (Result < TheMax) and (StrScanCI (opnd, scan^) = nil) do begin
- inc (Result);
- inc (scan);
- end;
- {$ENDIF}
- else begin // Oh dear. Called inappropriately.
+ end;
+ OP_BSUBEXP:
+ begin // ###0.936
+ ArrayIndex := GrpIndexes[Ord(opnd^)];
+ if ArrayIndex < 0 then
+ Exit;
+ GrpStart := startp[ArrayIndex];
+ if GrpStart = nil then
+ Exit;
+ GrpEnd := endp[ArrayIndex];
+ if GrpEnd = nil then
+ Exit;
+ repeat
+ opnd := GrpStart;
+ while opnd < GrpEnd do
+ begin
+ if (scan >= fInputEnd) or (scan^ <> opnd^) then
+ Exit;
+ Inc(scan);
+ Inc(opnd);
+ end;
+ Inc(Result);
+ reginput := scan;
+ until Result >= AMax;
+ end;
+ OP_BSUBEXPCI:
+ begin // ###0.936
+ ArrayIndex := GrpIndexes[Ord(opnd^)];
+ if ArrayIndex < 0 then
+ Exit;
+ GrpStart := startp[ArrayIndex];
+ if GrpStart = nil then
+ Exit;
+ GrpEnd := endp[ArrayIndex];
+ if GrpEnd = nil then
+ Exit;
+ repeat
+ opnd := GrpStart;
+ while opnd < GrpEnd do
+ begin
+ if (scan >= fInputEnd) or
+ ((scan^ <> opnd^) and (scan^ <> InvertCase(opnd^))) then
+ Exit;
+ Inc(scan);
+ Inc(opnd);
+ end;
+ Inc(Result);
+ reginput := scan;
+ until Result >= AMax;
+ end;
+ OP_ANYDIGIT:
+ while (Result < TheMax) and IsDigitChar(scan^) do
+ begin
+ Inc(Result);
+ Inc(scan);
+ end;
+ OP_NOTDIGIT:
+ while (Result < TheMax) and not IsDigitChar(scan^) do
+ begin
+ Inc(Result);
+ Inc(scan);
+ end;
+ OP_ANYLETTER:
+ while (Result < TheMax) and IsWordChar(scan^) do // ###0.940
+ begin
+ Inc(Result);
+ Inc(scan);
+ end;
+ OP_NOTLETTER:
+ while (Result < TheMax) and not IsWordChar(scan^) do // ###0.940
+ begin
+ Inc(Result);
+ Inc(scan);
+ end;
+ OP_ANYSPACE:
+ while (Result < TheMax) and IsSpaceChar(scan^) do
+ begin
+ Inc(Result);
+ Inc(scan);
+ end;
+ OP_NOTSPACE:
+ while (Result < TheMax) and not IsSpaceChar(scan^) do
+ begin
+ Inc(Result);
+ Inc(scan);
+ end;
+ OP_ANYVERTSEP:
+ while (Result < TheMax) and IsLineSeparator(scan^) do
+ begin
+ Inc(Result);
+ Inc(scan);
+ end;
+ OP_NOTVERTSEP:
+ while (Result < TheMax) and not IsLineSeparator(scan^) do
+ begin
+ Inc(Result);
+ Inc(scan);
+ end;
+ OP_ANYHORZSEP:
+ while (Result < TheMax) and IsHorzSeparator(scan^) do
+ begin
+ Inc(Result);
+ Inc(scan);
+ end;
+ OP_NOTHORZSEP:
+ while (Result < TheMax) and not IsHorzSeparator(scan^) do
+ begin
+ Inc(Result);
+ Inc(scan);
+ end;
+ OP_ANYOF:
+ while (Result < TheMax) and FindInCharClass(opnd, scan^, False) do
+ begin
+ Inc(Result);
+ Inc(scan);
+ end;
+ OP_ANYBUT:
+ while (Result < TheMax) and not FindInCharClass(opnd, scan^, False) do
+ begin
+ Inc(Result);
+ Inc(scan);
+ end;
+ OP_ANYOFCI:
+ while (Result < TheMax) and FindInCharClass(opnd, scan^, True) do
+ begin
+ Inc(Result);
+ Inc(scan);
+ end;
+ OP_ANYBUTCI:
+ while (Result < TheMax) and not FindInCharClass(opnd, scan^, True) do
+ begin
+ Inc(Result);
+ Inc(scan);
+ end;
+ else
+ begin // Oh dear. Called inappropriately.
Result := 0; // Best compromise.
- Error (reeRegRepeatCalledInappropriately);
- EXIT;
- end;
- end; { of case}
+ Error(reeRegRepeatCalledInappropriately);
+ Exit;
+ end;
+ end; { of case }
reginput := scan;
- end; { of function TRegExpr.regrepeat
---------------------------------------------------------------}
+end; { of function TRegExpr.regrepeat
+ -------------------------------------------------------------- }
-function TRegExpr.regnext (p : PRegExprChar) : PRegExprChar;
+function TRegExpr.regnext(p: PRegExprChar): PRegExprChar;
// dig the "next" pointer out of a node
- var offset : TRENextOff;
- begin
- if p = @regdummy then begin
+var
+ offset: TRENextOff;
+begin
+ if p = @regdummy then
+ begin
Result := nil;
- EXIT;
- end;
- offset := PRENextOff (AlignToPtr(p + REOpSz))^; //###0.933 inlined NEXT
- if offset = 0
- then Result := nil
- else Result := p + offset;
- end; { of function TRegExpr.regnext
---------------------------------------------------------------}
+ Exit;
+ end;
+ offset := PRENextOff(AlignToPtr(p + REOpSz))^; // ###0.933 inlined NEXT
+ if offset = 0 then
+ Result := nil
+ else
+ Result := p + offset;
+end; { of function TRegExpr.regnext
+ -------------------------------------------------------------- }
-function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean;
+function TRegExpr.MatchPrim(prog: PRegExprChar): boolean;
// recursively matching routine
// Conceptually the strategy is simple: check to see whether the current
// node matches, call self recursively to see whether the rest matches,
@@ -2874,1334 +3564,1713 @@ function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean;
// recursion, in particular by going through "ordinary" nodes (that don't
// need to know whether the rest of the match failed) by a loop instead of
// by recursion.
- Type
- TLoopStack = array [1 .. LoopStackMax] of integer;
-
- var
- scan : PRegExprChar; // Current node.
- next : PRegExprChar; // Next node.
- len : PtrInt;
- opnd : PRegExprChar;
- no : PtrInt;
- save : PRegExprChar;
- nextch : REChar;
- BracesMin, BracesMax : PtrInt; // we use integer instead of TREBracesArg for better support */+
+var
+ scan: PRegExprChar; // Current node.
+ next: PRegExprChar; // Next node.
+ Len: PtrInt;
+ opnd: PRegExprChar;
+ no: integer;
+ save: PRegExprChar;
+ nextch: REChar;
+ BracesMin, Bracesmax: integer;
+ // we use integer instead of TREBracesArg for better support */+
+ {$IFDEF ComplexBraces}
+ SavedLoopStack: TRegExprLoopStack; // :(( very bad for recursion
+ SavedLoopStackIdx: integer; // ###0.925
+ {$ENDIF}
+ bound1, bound2: boolean;
+begin
+ Result := False;
{$IFDEF ComplexBraces}
- SavedLoopStack : TloopStack; // :(( very bad for recursion
- SavedLoopStackIdx : integer; //###0.925
+ SavedLoopStack:=Default(TRegExprLoopStack);
+ SavedLoopStackIdx:=0;
{$ENDIF}
- begin
- Result := false;
scan := prog;
- SavedLoopStack:=Default(TLoopStack);
- while scan <> nil do begin
- len := PRENextOff (AlignToPtr(scan + 1))^; //###0.932 inlined regnext
- if len = 0
- then next := nil
- else next := scan + len;
-
- case scan^ of
- NOTBOUND, //###0.943 //!!! think about UseSetOfChar !!!
- BOUND:
- if (scan^ = BOUND)
- xor (
- ((reginput = fInputStart) or not IsWordChar((reginput - 1)^))
- and (reginput^ <> #0) and IsWordChar(reginput^)
- or
- (reginput <> fInputStart) and IsWordChar((reginput - 1)^)
- and ((reginput^ = #0) or not IsWordChar(reginput^)))
- then EXIT;
-
- BOL: if reginput <> fInputStart
- then EXIT;
- EOL: if reginput^ <> #0
- then EXIT;
- BOLML: if reginput > fInputStart then begin
- nextch := (reginput - 1)^;
- if (nextch <> fLinePairedSeparatorTail)
- or ((reginput - 1) <= fInputStart)
- or ((reginput - 2)^ <> fLinePairedSeparatorHead)
- then begin
- if (nextch = fLinePairedSeparatorHead)
- and (reginput^ = fLinePairedSeparatorTail)
- then EXIT; // don't stop between paired separator
- if
- {$IFNDEF UniCode}
- not (nextch in fLineSeparatorsSet)
- {$ELSE}
- (pos (nextch, fLineSeparators) <= 0)
- {$ENDIF}
- then EXIT;
- end;
- end;
- EOLML: if reginput^ <> #0 then begin
- nextch := reginput^;
- if (nextch <> fLinePairedSeparatorHead)
- or ((reginput + 1)^ <> fLinePairedSeparatorTail)
- then begin
- if (nextch = fLinePairedSeparatorTail)
- and (reginput > fInputStart)
- and ((reginput - 1)^ = fLinePairedSeparatorHead)
- then EXIT; // don't stop between paired separator
- if
- {$IFNDEF UniCode}
- not (nextch in fLineSeparatorsSet)
- {$ELSE}
- (pos (nextch, fLineSeparators) <= 0)
- {$ENDIF}
- then EXIT;
- end;
- end;
- ANY: begin
- if reginput^ = #0
- then EXIT;
- inc (reginput);
- end;
- ANYML: begin //###0.941
- if (reginput^ = #0)
- or ((reginput^ = fLinePairedSeparatorHead)
- and ((reginput + 1)^ = fLinePairedSeparatorTail))
- or {$IFNDEF UniCode} (reginput^ in fLineSeparatorsSet)
- {$ELSE} (pos (reginput^, fLineSeparators) > 0) {$ENDIF}
- then EXIT;
- inc (reginput);
- end;
- ANYDIGIT: begin
- if (reginput^ = #0) or Not IsDigit(reginput) then
- EXIT;
- inc (reginput);
- end;
- NOTDIGIT: begin
- if (reginput^ = #0) or IsDigit(reginput) then
- EXIT;
- inc (reginput);
- end;
- {$IFNDEF UseSetOfChar} //###0.929
- ANYLETTER: begin
- if (reginput^ = #0) or not IsWordChar(reginput^) //###0.943
- then EXIT;
- inc (reginput);
- end;
- NOTLETTER: begin
- if (reginput^ = #0) or IsWordChar(reginput^) //###0.943
- then EXIT;
- inc (reginput);
- end;
- ANYSPACE: begin
- if (reginput^ = #0) or not IsSpaceChar(reginput) //###0.943
- then EXIT;
- inc (reginput);
- end;
- NOTSPACE: begin
- if (reginput^ = #0) or IsSpaceChar(reginput) //###0.943
- then EXIT;
- inc (reginput);
- end;
- {$ENDIF}
- EXACTLYCI: begin
- opnd := scan + REOpSz + RENextOffSz; // OPERAND
- // Inline the first character, for speed.
- if (opnd^ <> reginput^)
- and (InvertCase (opnd^) <> reginput^)
- then EXIT;
- len := strlen (opnd);
- //###0.929 begin
- no := len;
- save := reginput;
- while no > 1 do begin
- inc (save);
- inc (opnd);
- if (opnd^ <> save^)
- and (InvertCase (opnd^) <> save^)
- then EXIT;
- dec (no);
- end;
- //###0.929 end
- inc (reginput, len);
- end;
- EXACTLY: begin
- opnd := scan + REOpSz + RENextOffSz; // OPERAND
- // Inline the first character, for speed.
- if opnd^ <> reginput^
- then EXIT;
- len := strlen (opnd);
- //###0.929 begin
- no := len;
- save := reginput;
- while no > 1 do begin
- inc (save);
- inc (opnd);
- if opnd^ <> save^
- then EXIT;
- dec (no);
- end;
- //###0.929 end
- inc (reginput, len);
- end;
- BSUBEXP: begin //###0.936
- no := ord ((scan + REOpSz + RENextOffSz)^);
- if startp [no] = nil
- then EXIT;
- if endp [no] = nil
- then EXIT;
- save := reginput;
- opnd := startp [no];
- while opnd < endp [no] do begin
- if (save >= fInputEnd) or (save^ <> opnd^)
- then EXIT;
- inc (save);
- inc (opnd);
- end;
- reginput := save;
+ while scan <> nil do
+ begin
+ Len := PRENextOff(AlignToPtr(scan + 1))^; // ###0.932 inlined regnext
+ if Len = 0 then
+ next := nil
+ else
+ next := scan + Len;
+
+ case scan^ of
+ OP_NOTBOUND,
+ OP_BOUND:
+ begin
+ bound1 := (reginput = fInputStart) or not IsWordChar((reginput - 1)^);
+ bound2 := (reginput = fInputEnd) or not IsWordChar(reginput^);
+ if (scan^ = OP_BOUND) xor (bound1 <> bound2) then
+ Exit;
+ end;
+ OP_BOL:
+ begin
+ if reginput <> fInputStart then
+ Exit;
+ end;
+ OP_EOL:
+ begin
+ if reginput < fInputEnd then
+ Exit;
+ end;
+ OP_BOLML:
+ if reginput > fInputStart then
+ begin
+ nextch := (reginput - 1)^;
+ if (nextch <> fLinePairedSeparatorTail) or
+ ((reginput - 1) <= fInputStart) or
+ ((reginput - 2)^ <> fLinePairedSeparatorHead) then
+ begin
+ if (nextch = fLinePairedSeparatorHead) and
+ (reginput^ = fLinePairedSeparatorTail) then
+ Exit; // don't stop between paired separator
+ if not IsCustomLineSeparator(nextch) then
+ Exit;
end;
- BSUBEXPCI: begin //###0.936
- no := ord ((scan + REOpSz + RENextOffSz)^);
- if startp [no] = nil
- then EXIT;
- if endp [no] = nil
- then EXIT;
- save := reginput;
- opnd := startp [no];
- while opnd < endp [no] do begin
- if (save >= fInputEnd) or
- ((save^ <> opnd^) and (save^ <> InvertCase (opnd^)))
- then EXIT;
- inc (save);
- inc (opnd);
- end;
- reginput := save;
+ end;
+ OP_EOLML:
+ if reginput < fInputEnd then
+ begin
+ nextch := reginput^;
+ if (nextch <> fLinePairedSeparatorHead) or
+ ((reginput + 1)^ <> fLinePairedSeparatorTail) then
+ begin
+ if (nextch = fLinePairedSeparatorTail) and (reginput > fInputStart)
+ and ((reginput - 1)^ = fLinePairedSeparatorHead) then
+ Exit; // don't stop between paired separator
+ if not IsCustomLineSeparator(nextch) then
+ Exit;
end;
- ANYOFTINYSET: begin
- if (reginput^ = #0) or //!!!TinySet
- ((reginput^ <> (scan + REOpSz + RENextOffSz)^)
- and (reginput^ <> (scan + REOpSz + RENextOffSz + 1)^)
- and (reginput^ <> (scan + REOpSz + RENextOffSz + 2)^))
- then EXIT;
- inc (reginput);
+ end;
+ OP_ANY:
+ begin
+ if reginput = fInputEnd then
+ Exit;
+ Inc(reginput);
+ end;
+ OP_ANYML:
+ begin // ###0.941
+ if (reginput = fInputEnd) or
+ ((reginput^ = fLinePairedSeparatorHead) and
+ ((reginput + 1)^ = fLinePairedSeparatorTail)) or
+ IsCustomLineSeparator(reginput^)
+ then
+ Exit;
+ Inc(reginput);
+ end;
+ OP_ANYDIGIT:
+ begin
+ if (reginput = fInputEnd) or not IsDigitChar(reginput^) then
+ Exit;
+ Inc(reginput);
+ end;
+ OP_NOTDIGIT:
+ begin
+ if (reginput = fInputEnd) or IsDigitChar(reginput^) then
+ Exit;
+ Inc(reginput);
+ end;
+ OP_ANYLETTER:
+ begin
+ if (reginput = fInputEnd) or not IsWordChar(reginput^) // ###0.943
+ then
+ Exit;
+ Inc(reginput);
+ end;
+ OP_NOTLETTER:
+ begin
+ if (reginput = fInputEnd) or IsWordChar(reginput^) // ###0.943
+ then
+ Exit;
+ Inc(reginput);
+ end;
+ OP_ANYSPACE:
+ begin
+ if (reginput = fInputEnd) or not IsSpaceChar(reginput^) // ###0.943
+ then
+ Exit;
+ Inc(reginput);
+ end;
+ OP_NOTSPACE:
+ begin
+ if (reginput = fInputEnd) or IsSpaceChar(reginput^) // ###0.943
+ then
+ Exit;
+ Inc(reginput);
+ end;
+ OP_ANYVERTSEP:
+ begin
+ if (reginput = fInputEnd) or not IsLineSeparator(reginput^) then
+ Exit;
+ Inc(reginput);
+ end;
+ OP_NOTVERTSEP:
+ begin
+ if (reginput = fInputEnd) or IsLineSeparator(reginput^) then
+ Exit;
+ Inc(reginput);
+ end;
+ OP_ANYHORZSEP:
+ begin
+ if (reginput = fInputEnd) or not IsHorzSeparator(reginput^) then
+ Exit;
+ Inc(reginput);
+ end;
+ OP_NOTHORZSEP:
+ begin
+ if (reginput = fInputEnd) or IsHorzSeparator(reginput^) then
+ Exit;
+ Inc(reginput);
+ end;
+ OP_EXACTLYCI:
+ begin
+ opnd := scan + REOpSz + RENextOffSz; // OPERAND
+ Len := PLongInt(opnd)^;
+ Inc(opnd, RENumberSz);
+ // Inline the first character, for speed.
+ if (opnd^ <> reginput^) and (InvertCase(opnd^) <> reginput^) then
+ Exit;
+ // ###0.929 begin
+ no := Len;
+ save := reginput;
+ while no > 1 do
+ begin
+ Inc(save);
+ Inc(opnd);
+ if (opnd^ <> save^) and (InvertCase(opnd^) <> save^) then
+ Exit;
+ Dec(no);
end;
- ANYBUTTINYSET: begin
- if (reginput^ = #0) or //!!!TinySet
- (reginput^ = (scan + REOpSz + RENextOffSz)^)
- or (reginput^ = (scan + REOpSz + RENextOffSz + 1)^)
- or (reginput^ = (scan + REOpSz + RENextOffSz + 2)^)
- then EXIT;
- inc (reginput);
+ // ###0.929 end
+ Inc(reginput, Len);
+ end;
+ OP_EXACTLY:
+ begin
+ opnd := scan + REOpSz + RENextOffSz; // OPERAND
+ Len := PLongInt(opnd)^;
+ Inc(opnd, RENumberSz);
+ // Inline the first character, for speed.
+ if opnd^ <> reginput^ then
+ Exit;
+ // ###0.929 begin
+ no := Len;
+ save := reginput;
+ while no > 1 do
+ begin
+ Inc(save);
+ Inc(opnd);
+ if opnd^ <> save^ then
+ Exit;
+ Dec(no);
end;
- {$IFDEF UseSetOfChar} //###0.929
- ANYOFFULLSET: begin
- if (reginput^ = #0)
- or not (reginput^ in PSetOfREChar (scan + REOpSz + RENextOffSz)^)
- then EXIT;
- inc (reginput);
+ // ###0.929 end
+ Inc(reginput, Len);
+ end;
+ OP_BSUBEXP:
+ begin // ###0.936
+ no := Ord((scan + REOpSz + RENextOffSz)^);
+ no := GrpIndexes[no];
+ if no < 0 then
+ Exit;
+ if startp[no] = nil then
+ Exit;
+ if endp[no] = nil then
+ Exit;
+ save := reginput;
+ opnd := startp[no];
+ while opnd < endp[no] do
+ begin
+ if (save >= fInputEnd) or (save^ <> opnd^) then
+ Exit;
+ Inc(save);
+ Inc(opnd);
end;
- {$ELSE}
- ANYOF: begin
- if (reginput^ = #0) or (StrScan (scan + REOpSz + RENextOffSz, reginput^) = nil)
- then EXIT;
- inc (reginput);
- end;
- ANYBUT: begin
- if (reginput^ = #0) or (StrScan (scan + REOpSz + RENextOffSz, reginput^) <> nil)
- then EXIT;
- inc (reginput);
- end;
- ANYOFCI: begin
- if (reginput^ = #0) or (StrScanCI (scan + REOpSz + RENextOffSz, reginput^) = nil)
- then EXIT;
- inc (reginput);
- end;
- ANYBUTCI: begin
- if (reginput^ = #0) or (StrScanCI (scan + REOpSz + RENextOffSz, reginput^) <> nil)
- then EXIT;
- inc (reginput);
- end;
- {$ENDIF}
- NOTHING: ;
- COMMENT: ;
- BACK: ;
- Succ (OPEN) .. TREOp (Ord (OPEN) + NSUBEXP - 1) : begin //###0.929
- no := ord (scan^) - ord (OPEN);
-// save := reginput;
- save := startp [no]; //###0.936
- startp [no] := reginput; //###0.936
- Result := MatchPrim (next);
- if not Result //###0.936
- then startp [no] := save;
-// if Result and (startp [no] = nil)
-// then startp [no] := save;
- // Don't set startp if some later invocation of the same
- // parentheses already has.
- EXIT;
- end;
- Succ (CLOSE) .. TREOp (Ord (CLOSE) + NSUBEXP - 1): begin //###0.929
- no := ord (scan^) - ord (CLOSE);
-// save := reginput;
- save := endp [no]; //###0.936
- endp [no] := reginput; //###0.936
- Result := MatchPrim (next);
- if not Result //###0.936
- then endp [no] := save;
-// if Result and (endp [no] = nil)
-// then endp [no] := save;
- // Don't set endp if some later invocation of the same
- // parentheses already has.
- EXIT;
- end;
- BRANCH: begin
- if (next^ <> BRANCH) // No choice.
- then next := scan + REOpSz + RENextOffSz // Avoid recursion
- else begin
- REPEAT
- save := reginput;
- Result := MatchPrim (scan + REOpSz + RENextOffSz);
- if Result
- then EXIT;
- reginput := save;
- scan := regnext (scan);
- UNTIL (scan = nil) or (scan^ <> BRANCH);
- EXIT;
- end;
- end;
- {$IFDEF ComplexBraces}
- LOOPENTRY: begin //###0.925
- no := LoopStackIdx;
- inc (LoopStackIdx);
- if LoopStackIdx > LoopStackMax then begin
- Error (reeLoopStackExceeded);
- EXIT;
- end;
- save := reginput;
- LoopStack [LoopStackIdx] := 0; // init loop counter
- Result := MatchPrim (next); // execute LOOP
- LoopStackIdx := no; // cleanup
- if Result
- then EXIT;
- reginput := save;
- EXIT;
+ reginput := save;
+ end;
+ OP_BSUBEXPCI:
+ begin // ###0.936
+ no := Ord((scan + REOpSz + RENextOffSz)^);
+ no := GrpIndexes[no];
+ if no < 0 then
+ Exit;
+ if startp[no] = nil then
+ Exit;
+ if endp[no] = nil then
+ Exit;
+ save := reginput;
+ opnd := startp[no];
+ while opnd < endp[no] do
+ begin
+ if (save >= fInputEnd) or
+ ((save^ <> opnd^) and (save^ <> InvertCase(opnd^))) then
+ Exit;
+ Inc(save);
+ Inc(opnd);
end;
- LOOP, LOOPNG: begin //###0.940
- if LoopStackIdx <= 0 then begin
- Error (reeLoopWithoutEntry);
- EXIT;
- end;
- opnd := scan + PRENextOff (AlignToPtr(scan + REOpSz + RENextOffSz + 2 * REBracesArgSz))^;
- BracesMin := PREBracesArg (AlignToInt(scan + REOpSz + RENextOffSz))^;
- BracesMax := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz))^;
- save := reginput;
- if LoopStack [LoopStackIdx] >= BracesMin then begin // Min alredy matched - we can work
- if scan^ = LOOP then begin
- // greedy way - first try to max deep of greed ;)
- if LoopStack [LoopStackIdx] < BracesMax then begin
- inc (LoopStack [LoopStackIdx]);
- no := LoopStackIdx;
- Result := MatchPrim (opnd);
- LoopStackIdx := no;
- if Result
- then EXIT;
- reginput := save;
- end;
- dec (LoopStackIdx); // Fail. May be we are too greedy? ;)
- Result := MatchPrim (next);
- if not Result
- then reginput := save;
- EXIT;
- end
- else begin
- // non-greedy - try just now
- Result := MatchPrim (next);
- if Result
- then EXIT
- else reginput := save; // failed - move next and try again
- if LoopStack [LoopStackIdx] < BracesMax then begin
- inc (LoopStack [LoopStackIdx]);
- no := LoopStackIdx;
- Result := MatchPrim (opnd);
- LoopStackIdx := no;
- if Result
- then EXIT;
- reginput := save;
- end;
- dec (LoopStackIdx); // Failed - back up
- EXIT;
- end
- end
- else begin // first match a min_cnt times
- inc (LoopStack [LoopStackIdx]);
- no := LoopStackIdx;
- Result := MatchPrim (opnd);
- LoopStackIdx := no;
- if Result
- then EXIT;
- dec (LoopStack [LoopStackIdx]);
+ reginput := save;
+ end;
+ OP_ANYOF:
+ begin
+ if (reginput = fInputEnd) or
+ not FindInCharClass(scan + REOpSz + RENextOffSz, reginput^, False) then
+ Exit;
+ Inc(reginput);
+ end;
+ OP_ANYBUT:
+ begin
+ if (reginput = fInputEnd) or
+ FindInCharClass(scan + REOpSz + RENextOffSz, reginput^, False) then
+ Exit;
+ Inc(reginput);
+ end;
+ OP_ANYOFCI:
+ begin
+ if (reginput = fInputEnd) or
+ not FindInCharClass(scan + REOpSz + RENextOffSz, reginput^, True) then
+ Exit;
+ Inc(reginput);
+ end;
+ OP_ANYBUTCI:
+ begin
+ if (reginput = fInputEnd) or
+ FindInCharClass(scan + REOpSz + RENextOffSz, reginput^, True) then
+ Exit;
+ Inc(reginput);
+ end;
+ OP_NOTHING:
+ ;
+ OP_COMMENT:
+ ;
+ OP_BACK:
+ ;
+ Succ(OP_OPEN) .. TREOp(Ord(OP_OPEN) + NSUBEXP - 1):
+ begin // ###0.929
+ no := Ord(scan^) - Ord(OP_OPEN);
+ // save := reginput;
+ save := startp[no]; // ###0.936
+ startp[no] := reginput; // ###0.936
+ Result := MatchPrim(next);
+ if not Result // ###0.936
+ then
+ startp[no] := save;
+ // if Result and (startp [no] = nil)
+ // then startp [no] := save;
+ // Don't set startp if some later invocation of the same
+ // parentheses already has.
+ Exit;
+ end;
+ Succ(OP_CLOSE) .. TREOp(Ord(OP_CLOSE) + NSUBEXP - 1):
+ begin // ###0.929
+ no := Ord(scan^) - Ord(OP_CLOSE);
+ // save := reginput;
+ save := endp[no]; // ###0.936
+ endp[no] := reginput; // ###0.936
+ Result := MatchPrim(next);
+ if not Result // ###0.936
+ then
+ endp[no] := save;
+ // if Result and (endp [no] = nil)
+ // then endp [no] := save;
+ // Don't set endp if some later invocation of the same
+ // parentheses already has.
+ Exit;
+ end;
+ OP_BRANCH:
+ begin
+ if (next^ <> OP_BRANCH) // No choice.
+ then
+ next := scan + REOpSz + RENextOffSz // Avoid recursion
+ else
+ begin
+ repeat
+ save := reginput;
+ Result := MatchPrim(scan + REOpSz + RENextOffSz);
+ if Result then
+ Exit;
reginput := save;
- EXIT;
- end;
- end;
- {$ENDIF}
- STAR, PLUS, BRACES, STARNG, PLUSNG, BRACESNG: begin
- // Lookahead to avoid useless match attempts when we know
- // what character comes next.
- nextch := #0;
- if next^ = EXACTLY
- then nextch := (next + REOpSz + RENextOffSz)^;
- BracesMax := MaxInt; // infinite loop for * and + //###0.92
- if (scan^ = STAR) or (scan^ = STARNG)
- then BracesMin := 0 // STAR
- else if (scan^ = PLUS) or (scan^ = PLUSNG)
- then BracesMin := 1 // PLUS
- else begin // BRACES
- BracesMin := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz))^;
- BracesMax := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz))^;
- end;
- save := reginput;
- opnd := scan + REOpSz + RENextOffSz;
- if (scan^ = BRACES) or (scan^ = BRACESNG)
- then inc (opnd, 2 * REBracesArgSz);
-
- if (scan^ = PLUSNG) or (scan^ = STARNG) or (scan^ = BRACESNG) then begin
- // non-greedy mode
- BracesMax := regrepeat (opnd, BracesMax); // don't repeat more than BracesMax
- // Now we know real Max limit to move forward (for recursion 'back up')
- // In some cases it can be faster to check only Min positions first,
- // but after that we have to check every position separtely instead
- // of fast scannig in loop.
- no := BracesMin;
- while no <= BracesMax do begin
- reginput := save + no;
- // If it could work, try it.
- if (nextch = #0) or (reginput^ = nextch) then begin
- {$IFDEF ComplexBraces}
- System.Move (LoopStack, SavedLoopStack, SizeOf (LoopStack)); //###0.925
- SavedLoopStackIdx := LoopStackIdx;
- {$ENDIF}
- if MatchPrim (next) then begin
- Result := true;
- EXIT;
- end;
- {$IFDEF ComplexBraces}
- System.Move (SavedLoopStack, LoopStack, SizeOf (LoopStack));
- LoopStackIdx := SavedLoopStackIdx;
- {$ENDIF}
- end;
- inc (no); // Couldn't or didn't - move forward.
- end; { of while}
- EXIT;
- end
- else begin // greedy mode
- no := regrepeat (opnd, BracesMax); // don't repeat more than max_cnt
- while no >= BracesMin do begin
- // If it could work, try it.
- if (nextch = #0) or (reginput^ = nextch) then begin
- {$IFDEF ComplexBraces}
- System.Move (LoopStack, SavedLoopStack, SizeOf (LoopStack)); //###0.925
- SavedLoopStackIdx := LoopStackIdx;
- {$ENDIF}
- if MatchPrim (next) then begin
- Result := true;
- EXIT;
- end;
- {$IFDEF ComplexBraces}
- System.Move (SavedLoopStack, LoopStack, SizeOf (LoopStack));
- LoopStackIdx := SavedLoopStackIdx;
- {$ENDIF}
- end;
- dec (no); // Couldn't or didn't - back up.
- reginput := save + no;
- end; { of while}
- EXIT;
- end;
+ scan := regnext(scan);
+ until (scan = nil) or (scan^ <> OP_BRANCH);
+ Exit;
end;
- EEND: begin
- Result := true; // Success!
- EXIT;
- end;
- else begin
- Error (reeMatchPrimMemoryCorruption);
- EXIT;
+ end;
+ {$IFDEF ComplexBraces}
+ OP_LOOPENTRY:
+ begin // ###0.925
+ no := LoopStackIdx;
+ Inc(LoopStackIdx);
+ if LoopStackIdx > LoopStackMax then
+ begin
+ Error(reeLoopStackExceeded);
+ Exit;
end;
- end; { of case scan^}
- scan := next;
- end; { of while scan <> nil}
-
- // We get here only if there's trouble -- normally "case EEND" is the
- // terminating point.
- Error (reeMatchPrimCorruptedPointers);
- end; { of function TRegExpr.MatchPrim
---------------------------------------------------------------}
-
-{$IFDEF UseFirstCharSet} //###0.929
-procedure TRegExpr.FillFirstCharSet (prog : PRegExprChar);
- var
- scan : PRegExprChar; // Current node.
- next : PRegExprChar; // Next node.
- opnd : PRegExprChar;
- min_cnt : integer;
- begin
- scan := prog;
- while scan <> nil do begin
- next := regnext (scan);
- case PREOp (scan)^ of
- BSUBEXP, BSUBEXPCI: begin //###0.938
- FirstCharSet := [#0 .. #255]; // :((( we cannot
- // optimize r.e. if it starts with back reference
- EXIT;
+ save := reginput;
+ LoopStack[LoopStackIdx] := 0; // init loop counter
+ Result := MatchPrim(next); // execute loop
+ LoopStackIdx := no; // cleanup
+ if Result then
+ Exit;
+ reginput := save;
+ Exit;
+ end;
+ OP_LOOP, OP_LOOPNG:
+ begin // ###0.940
+ if LoopStackIdx <= 0 then
+ begin
+ Error(reeLoopWithoutEntry);
+ Exit;
end;
- BOL, BOLML: ; // EXIT; //###0.937
- EOL, EOLML: begin //###0.948 was empty in 0.947, was EXIT in 0.937
- Include (FirstCharSet, #0);
- if ModifierM
- then begin
- opnd := PRegExprChar (LineSeparators);
- while opnd^ <> #0 do begin
- Include (FirstCharSet, opnd^);
- inc (opnd);
+ opnd := scan + PRENextOff(AlignToPtr(scan + REOpSz + RENextOffSz + 2 * REBracesArgSz))^;
+ BracesMin := PREBracesArg(AlignToInt(scan + REOpSz + RENextOffSz))^;
+ Bracesmax := PREBracesArg(AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz))^;
+ save := reginput;
+ if LoopStack[LoopStackIdx] >= BracesMin then
+ begin // Min alredy matched - we can work
+ if scan^ = OP_LOOP then
+ begin
+ // greedy way - first try to max deep of greed ;)
+ if LoopStack[LoopStackIdx] < Bracesmax then
+ begin
+ Inc(LoopStack[LoopStackIdx]);
+ no := LoopStackIdx;
+ Result := MatchPrim(opnd);
+ LoopStackIdx := no;
+ if Result then
+ Exit;
+ reginput := save;
end;
- end;
- EXIT;
- end;
- BOUND, NOTBOUND: ; //###0.943 ?!!
- ANY, ANYML: begin // we can better define ANYML !!!
- FirstCharSet := [#0 .. #255]; //###0.930
- EXIT;
- end;
- ANYDIGIT: begin
- FirstCharSet := FirstCharSet + ['0' .. '9'];
- EXIT;
- end;
- NOTDIGIT: begin
- FirstCharSet := FirstCharSet + ([#0 .. #255] - ['0' .. '9']); //###0.948 FirstCharSet was forgotten
- EXIT;
- end;
- EXACTLYCI: begin
- Include (FirstCharSet, (scan + REOpSz + RENextOffSz)^);
- Include (FirstCharSet, InvertCase ((scan + REOpSz + RENextOffSz)^));
- EXIT;
- end;
- EXACTLY: begin
- Include (FirstCharSet, (scan + REOpSz + RENextOffSz)^);
- EXIT;
- end;
- ANYOFFULLSET: begin
- FirstCharSet := FirstCharSet + PSetOfREChar (scan + REOpSz + RENextOffSz)^;
- EXIT;
- end;
- ANYOFTINYSET: begin
- //!!!TinySet
- Include (FirstCharSet, (scan + REOpSz + RENextOffSz)^);
- Include (FirstCharSet, (scan + REOpSz + RENextOffSz + 1)^);
- Include (FirstCharSet, (scan + REOpSz + RENextOffSz + 2)^);
- // ... // up to TinySetLen
- EXIT;
- end;
- ANYBUTTINYSET: begin
- //!!!TinySet
- FirstCharSet := FirstCharSet + ([#0 .. #255] - [ //###0.948 FirstCharSet was forgotten
- (scan + REOpSz + RENextOffSz)^,
- (scan + REOpSz + RENextOffSz + 1)^,
- (scan + REOpSz + RENextOffSz + 2)^]);
- // ... // up to TinySetLen
- EXIT;
- end;
- NOTHING: ;
- COMMENT: ;
- BACK: ;
- Succ (OPEN) .. TREOp (Ord (OPEN) + NSUBEXP - 1) : begin //###0.929
- FillFirstCharSet (next);
- EXIT;
- end;
- Succ (CLOSE) .. TREOp (Ord (CLOSE) + NSUBEXP - 1): begin //###0.929
- FillFirstCharSet (next);
- EXIT;
- end;
- BRANCH: begin
- if (PREOp (next)^ <> BRANCH) // No choice.
- then next := scan + REOpSz + RENextOffSz // Avoid recursion.
- else begin
- REPEAT
- FillFirstCharSet (scan + REOpSz + RENextOffSz);
- scan := regnext (scan);
- UNTIL (scan = nil) or (PREOp (scan)^ <> BRANCH);
- EXIT;
+ Dec(LoopStackIdx); // Fail. May be we are too greedy? ;)
+ Result := MatchPrim(next);
+ if not Result then
+ reginput := save;
+ Exit;
+ end
+ else
+ begin
+ // non-greedy - try just now
+ Result := MatchPrim(next);
+ if Result then
+ Exit
+ else
+ reginput := save; // failed - move next and try again
+ if LoopStack[LoopStackIdx] < Bracesmax then
+ begin
+ Inc(LoopStack[LoopStackIdx]);
+ no := LoopStackIdx;
+ Result := MatchPrim(opnd);
+ LoopStackIdx := no;
+ if Result then
+ Exit;
+ reginput := save;
end;
- end;
- {$IFDEF ComplexBraces}
- LOOPENTRY: begin //###0.925
-// LoopStack [LoopStackIdx] := 0; //###0.940 line removed
- FillFirstCharSet (next); // execute LOOP
- EXIT;
- end;
- LOOP, LOOPNG: begin //###0.940
- opnd := scan + PRENextOff (AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz * 2))^;
- min_cnt := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz))^;
- FillFirstCharSet (opnd);
- if min_cnt = 0
- then FillFirstCharSet (next);
- EXIT;
- end;
- {$ENDIF}
- STAR, STARNG: //###0.940
- FillFirstCharSet (scan + REOpSz + RENextOffSz);
- PLUS, PLUSNG: begin //###0.940
- FillFirstCharSet (scan + REOpSz + RENextOffSz);
- EXIT;
+ Dec(LoopStackIdx); // Failed - back up
+ Exit;
+ end
+ end
+ else
+ begin // first match a min_cnt times
+ Inc(LoopStack[LoopStackIdx]);
+ no := LoopStackIdx;
+ Result := MatchPrim(opnd);
+ LoopStackIdx := no;
+ if Result then
+ Exit;
+ Dec(LoopStack[LoopStackIdx]);
+ reginput := save;
+ Exit;
end;
- BRACES, BRACESNG: begin //###0.940
- opnd := scan + REOpSz + RENextOffSz + REBracesArgSz * 2;
- min_cnt := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz))^; // BRACES
- FillFirstCharSet (opnd);
- if min_cnt > 0
- then EXIT;
+ end;
+ {$ENDIF}
+ OP_STAR, OP_PLUS, OP_BRACES, OP_STARNG, OP_PLUSNG, OP_BRACESNG:
+ begin
+ // Lookahead to avoid useless match attempts when we know
+ // what character comes next.
+ nextch := #0;
+ if next^ = OP_EXACTLY then
+ nextch := (next + REOpSz + RENextOffSz + RENumberSz)^;
+ Bracesmax := MaxInt; // infinite loop for * and + //###0.92
+ if (scan^ = OP_STAR) or (scan^ = OP_STARNG) then
+ BracesMin := 0 // star
+ else if (scan^ = OP_PLUS) or (scan^ = OP_PLUSNG) then
+ BracesMin := 1 // plus
+ else
+ begin // braces
+ BracesMin := PREBracesArg(AlignToPtr(scan + REOpSz + RENextOffSz))^;
+ Bracesmax := PREBracesArg(AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz))^;
end;
- EEND: begin
- FirstCharSet := [#0 .. #255]; //###0.948
- EXIT;
- end;
- else begin
- Error (reeMatchPrimMemoryCorruption);
- EXIT;
+ save := reginput;
+ opnd := scan + REOpSz + RENextOffSz;
+ if (scan^ = OP_BRACES) or (scan^ = OP_BRACESNG) then
+ Inc(opnd, 2 * REBracesArgSz);
+
+ if (scan^ = OP_PLUSNG) or (scan^ = OP_STARNG) or (scan^ = OP_BRACESNG) then
+ begin
+ // non-greedy mode
+ Bracesmax := regrepeat(opnd, Bracesmax);
+ // don't repeat more than BracesMax
+ // Now we know real Max limit to move forward (for recursion 'back up')
+ // In some cases it can be faster to check only Min positions first,
+ // but after that we have to check every position separtely instead
+ // of fast scannig in loop.
+ no := BracesMin;
+ while no <= Bracesmax do
+ begin
+ reginput := save + no;
+ // If it could work, try it.
+ if (nextch = #0) or (reginput^ = nextch) then
+ begin
+ {$IFDEF ComplexBraces}
+ System.Move(LoopStack, SavedLoopStack, SizeOf(LoopStack));
+ // ###0.925
+ SavedLoopStackIdx := LoopStackIdx;
+ {$ENDIF}
+ if MatchPrim(next) then
+ begin
+ Result := True;
+ Exit;
+ end;
+ {$IFDEF ComplexBraces}
+ System.Move(SavedLoopStack, LoopStack, SizeOf(LoopStack));
+ LoopStackIdx := SavedLoopStackIdx;
+ {$ENDIF}
+ end;
+ Inc(no); // Couldn't or didn't - move forward.
+ end; { of while }
+ Exit;
+ end
+ else
+ begin // greedy mode
+ no := regrepeat(opnd, Bracesmax); // don't repeat more than max_cnt
+ while no >= BracesMin do
+ begin
+ // If it could work, try it.
+ if (nextch = #0) or (reginput^ = nextch) then
+ begin
+ {$IFDEF ComplexBraces}
+ System.Move(LoopStack, SavedLoopStack, SizeOf(LoopStack));
+ // ###0.925
+ SavedLoopStackIdx := LoopStackIdx;
+ {$ENDIF}
+ if MatchPrim(next) then
+ begin
+ Result := True;
+ Exit;
+ end;
+ {$IFDEF ComplexBraces}
+ System.Move(SavedLoopStack, LoopStack, SizeOf(LoopStack));
+ LoopStackIdx := SavedLoopStackIdx;
+ {$ENDIF}
+ end;
+ Dec(no); // Couldn't or didn't - back up.
+ reginput := save + no;
+ end; { of while }
+ Exit;
end;
- end; { of case scan^}
- scan := next;
- end; { of while scan <> nil}
- end; { of procedure FillFirstCharSet
---------------------------------------------------------------}
-{$ENDIF}
+ end;
+ OP_EEND:
+ begin
+ Result := True; // Success!
+ Exit;
+ end;
+ else
+ begin
+ Error(reeMatchPrimMemoryCorruption);
+ Exit;
+ end;
+ end; { of case scan^ }
+ scan := next;
+ end; { of while scan <> nil }
-function TRegExpr.Exec (const AInputString : RegExprString) : boolean;
- begin
+ // We get here only if there's trouble -- normally "case EEND" is the
+ // terminating point.
+ Error(reeMatchPrimCorruptedPointers);
+end; { of function TRegExpr.MatchPrim
+ -------------------------------------------------------------- }
+
+function TRegExpr.Exec(const AInputString: RegExprString): boolean;
+begin
InputString := AInputString;
- Result := ExecPrim (1);
- end; { of function TRegExpr.Exec
---------------------------------------------------------------}
+ Result := ExecPrim(1, False, False);
+end; { of function TRegExpr.Exec
+ -------------------------------------------------------------- }
-function TRegExpr.Exec : boolean;
- begin
- Result := ExecPrim (1);
- end; { of function TRegExpr.Exec
---------------------------------------------------------------}
-function TRegExpr.Exec (AOffset: PtrInt) : boolean;
- begin
- Result := ExecPrim (AOffset);
- end; { of function TRegExpr.Exec
---------------------------------------------------------------}
+function TRegExpr.Exec: boolean;
+var
+ SlowChecks: boolean;
+begin
+ SlowChecks := Length(fInputString) < fSlowChecksSizeMax;
+ Result := ExecPrim(1, False, SlowChecks);
+end; { of function TRegExpr.Exec
+ -------------------------------------------------------------- }
+function TRegExpr.Exec(AOffset: integer): boolean;
+begin
+ Result := ExecPrim(AOffset, False, False);
+end; { of function TRegExpr.Exec
+ -------------------------------------------------------------- }
-function TRegExpr.ExecPos (AOffset: PtrInt {$IFDEF DefParam}= 1{$ENDIF}) : boolean;
- begin
- Result := ExecPrim (AOffset);
- end; { of function TRegExpr.ExecPos
---------------------------------------------------------------}
-function TRegExpr.ExecPrim (AOffset: PtrInt) : boolean;
- procedure ClearMatchs;
- // Clears matchs array
- var i : integer;
- begin
- for i := 0 to NSUBEXP - 1 do begin
- startp [i] := nil;
- endp [i] := nil;
- end;
- end; { of procedure ClearMatchs;
-..............................................................}
- function RegMatch (str : PRegExprChar) : boolean;
- // try match at specific point
+function TRegExpr.ExecPos(AOffset: integer = 1): boolean;
+begin
+ Result := ExecPrim(AOffset, False, False);
+end; { of function TRegExpr.ExecPos
+ -------------------------------------------------------------- }
+
+
+function TRegExpr.ExecPos(AOffset: integer; ATryOnce: boolean): boolean;
+begin
+ Result := ExecPrim(AOffset, ATryOnce, False);
+end;
+
+
+function TRegExpr.MatchAtOnePos(APos: PRegExprChar): boolean;
+begin
+ reginput := APos;
+ Result := MatchPrim(programm + REOpSz);
+ if Result then
begin
- //###0.949 removed clearing of start\endp
- reginput := str;
- Result := MatchPrim (programm + REOpSz);
- if Result then begin
- startp [0] := str;
- endp [0] := reginput;
- end;
- end; { of function RegMatch
-..............................................................}
- var
- s : PRegExprChar;
- StartPtr: PRegExprChar;
- InputLen : PtrInt;
- begin
- Result := false; // Be paranoid...
-
- ClearMatchs; //###0.949
- // ensure that Match cleared either if optimization tricks or some error
+ startp[0] := APos;
+ endp[0] := reginput;
+ end;
+end;
+
+procedure TRegExpr.ClearMatches;
+begin
+ FillChar(startp, SizeOf(startp), 0);
+ FillChar(endp, SizeOf(endp), 0);
+end;
+
+procedure TRegExpr.ClearInternalIndexes;
+var
+ i: integer;
+begin
+ FillChar(startp, SizeOf(startp), 0);
+ FillChar(endp, SizeOf(endp), 0);
+ for i := 0 to NSUBEXP - 1 do
+ GrpIndexes[i] := -1;
+ GrpIndexes[0] := 0;
+ GrpCount := 0;
+end;
+
+function TRegExpr.ExecPrim(AOffset: integer; ATryOnce, ASlowChecks: boolean): boolean;
+var
+ Ptr: PRegExprChar;
+begin
+ Result := False;
+
+ // Ensure that Match cleared either if optimization tricks or some error
// will lead to leaving ExecPrim without actual search. That is
- // importent for ExecNext logic and so on.
+ // important for ExecNext logic and so on.
+ ClearMatches;
- if not IsProgrammOk //###0.929
- then EXIT;
+ // Don't check IsProgrammOk here! it causes big slowdown in test_benchmark!
+ if programm = nil then
+ begin
+ Compile;
+ if programm = nil then
+ Exit;
+ end;
// Check InputString presence
- if not Assigned (fInputString) then begin
- Error (reeNoInputStringSpecified);
- EXIT;
- end;
-
- InputLen := length (fInputString);
-
- //Check that the start position is not negative
- if AOffset < 1 then begin
- Error (reeOffsetMustBeGreaterThen0);
- EXIT;
- end;
+ if fInputString = '' then
+ begin
+ Error(reeNoInputStringSpecified);
+ Exit;
+ end;
+
+ // Check that the start position is not negative
+ if AOffset < 1 then
+ begin
+ Error(reeOffsetMustBePositive);
+ Exit;
+ end;
+
// Check that the start position is not longer than the line
// If so then exit with nothing found
- if AOffset > (InputLen + 1) // for matching empty string after last char.
- then EXIT;
+ if AOffset > (Length(fInputString) + 1) // for matching empty string after last char.
+ then
+ Exit;
- StartPtr := fInputString + AOffset - 1;
+ Ptr := fInputStart + AOffset - 1;
// If there is a "must appear" string, look for it.
- if regmust <> nil then begin
- s := StartPtr;
- REPEAT
- s := StrScan (s, regmust [0]);
- if s <> nil then begin
- if StrLComp (s, regmust, regmlen) = 0
- then BREAK; // Found it.
- inc (s);
- end;
- UNTIL s = nil;
- if s = nil // Not present.
- then EXIT;
- end;
-
- // Mark beginning of line for ^ .
- fInputStart := fInputString;
-
- // Pointer to end of input stream - for
- // pascal-style string processing (may include #0)
- fInputEnd := fInputString + InputLen;
+ if ASlowChecks then
+ if regmustString <> '' then
+ if Pos(regmustString, fInputString) = 0 then Exit;
{$IFDEF ComplexBraces}
// no loops started
- LoopStackIdx := 0; //###0.925
+ LoopStackIdx := 0; // ###0.925
{$ENDIF}
- // Simplest case: anchored match need be tried only once.
- if reganch <> #0 then begin
- Result := RegMatch (StartPtr);
- EXIT;
- end;
-
- // Messy cases: unanchored match.
- s := StartPtr;
- if regstart <> #0 then // We know what char it must start with.
- REPEAT
- s := StrScan (s, regstart);
- if s <> nil then begin
- Result := RegMatch (s);
- if Result
- then EXIT
- else ClearMatchs; //###0.949
- inc (s);
- end;
- UNTIL s = nil
- else begin // We don't - general case.
- repeat //###0.948
- {$IFDEF UseFirstCharSet}
- if s^ in FirstCharSet
- then Result := RegMatch (s);
- {$ELSE}
- Result := RegMatch (s);
- {$ENDIF}
- if Result or (s^ = #0) // Exit on a match or after testing the end-of-string.
- then EXIT
- else ClearMatchs; //###0.949
- inc (s);
- until false;
-(* optimized and fixed by Martin Fuller - empty strings
- were not allowed to pass through in UseFirstCharSet mode
- {$IFDEF UseFirstCharSet} //###0.929
- while s^ <> #0 do begin
- if s^ in FirstCharSet
- then Result := RegMatch (s);
- if Result
- then EXIT;
- inc (s);
- end;
- {$ELSE}
- REPEAT
- Result := RegMatch (s);
- if Result
- then EXIT;
- inc (s);
- UNTIL s^ = #0;
- {$ENDIF}
-*)
- end;
- // Failure
- end; { of function TRegExpr.ExecPrim
---------------------------------------------------------------}
+ // ATryOnce or anchored match (it needs to be tried only once).
+ if ATryOnce or (reganchored <> #0) then
+ begin
+ {$IFDEF UseFirstCharSet}
+ {$IFDEF UniCode}
+ if Ord(Ptr^) <= $FF then
+ {$ENDIF}
+ if not FirstCharArray[byte(Ptr^)] then
+ Exit;
+ {$ENDIF}
+ Result := MatchAtOnePos(Ptr);
+ Exit;
+ end;
-function TRegExpr.ExecNext : boolean;
- var offset : PtrInt;
- begin
- Result := false;
- if not Assigned (startp[0]) or not Assigned (endp[0]) then begin
- Error (reeExecNextWithoutExec);
- EXIT;
- end;
-// Offset := MatchPos [0] + MatchLen [0];
-// if MatchLen [0] = 0
- Offset := endp [0] - fInputString + 1; //###0.929
- if endp [0] = startp [0] //###0.929
- then inc (Offset); // prevent infinite looping if empty string match r.e.
- Result := ExecPrim (Offset);
- end; { of function TRegExpr.ExecNext
---------------------------------------------------------------}
+ // Messy cases: unanchored match.
+ Dec(Ptr);
+ repeat
+ Inc(Ptr);
+ if Ptr > fInputEnd then
+ Exit;
-function TRegExpr.GetInputString : RegExprString;
- begin
- if not Assigned (fInputString) then begin
- Error (reeGetInputStringWithoutInputString);
- EXIT;
- end;
- Result := fInputString;
- end; { of function TRegExpr.GetInputString
---------------------------------------------------------------}
+ {$IFDEF UseFirstCharSet}
+ {$IFDEF UniCode}
+ if Ord(Ptr^) <= $FF then
+ {$ENDIF}
+ if not FirstCharArray[byte(Ptr^)] then
+ Continue;
+ {$ENDIF}
-procedure TRegExpr.SetInputString (const AInputString : RegExprString);
- var
- Len : PtrInt;
- i : PtrInt;
- begin
- // clear Match* - before next Exec* call it's undefined
- for i := 0 to NSUBEXP - 1 do begin
- startp [i] := nil;
- endp [i] := nil;
- end;
-
- // need reallocation of input string buffer ?
- Len := length (AInputString);
- ReAllocMem(fInputString,(Len + 1) * SizeOf (REChar));
- // copy input string into buffer
- if Len>0 then
- System.Move(AInputString[1],fInputString^,(Len+1)* SizeOf (REChar)) // with #0
- else
- fInputString[0]:=#0;
+ Result := MatchAtOnePos(Ptr);
+ // Exit on a match or after testing the end-of-string
+ if Result then
+ Exit;
+ until False;
+end; { of function TRegExpr.ExecPrim
+ -------------------------------------------------------------- }
- {
- fInputString : string;
- fInputStart, fInputEnd : PRegExprChar;
+function TRegExpr.ExecNext: boolean;
+var
+ PtrBegin, PtrEnd: PRegExprChar;
+ Offset: PtrInt;
+begin
+ PtrBegin := startp[0];
+ PtrEnd := endp[0];
+ if (PtrBegin = nil) or (PtrEnd = nil) then
+ begin
+ Error(reeExecNextWithoutExec);
+ Result := False;
+ Exit;
+ end;
+
+ Offset := PtrEnd - fInputStart + 1;
+ // prevent infinite looping if empty string matches r.e.
+ if PtrBegin = PtrEnd then
+ Inc(Offset);
+
+ Result := ExecPrim(Offset, False, False);
+end; { of function TRegExpr.ExecNext
+ -------------------------------------------------------------- }
+
+procedure TRegExpr.SetInputString(const AInputString: RegExprString);
+begin
+ ClearMatches;
- SetInputString:
fInputString := AInputString;
- UniqueString (fInputString);
- fInputStart := PChar (fInputString);
- Len := length (fInputString);
- fInputEnd := PRegExprChar (integer (fInputStart) + Len); ??
- !! startp/endp still dangerous to use ?
- }
- end; { of procedure TRegExpr.SetInputString
---------------------------------------------------------------}
+ UniqueString(fInputString);
+
+ fInputStart := PRegExprChar(fInputString);
+ fInputEnd := fInputStart + Length(fInputString);
+end; { of procedure TRegExpr.SetInputString
+ -------------------------------------------------------------- }
-procedure TRegExpr.SetLineSeparators (const AStr : RegExprString);
- begin
- if AStr <> fLineSeparators then begin
+procedure TRegExpr.SetLineSeparators(const AStr: RegExprString);
+begin
+ if AStr <> fLineSeparators then
+ begin
fLineSeparators := AStr;
+ InitLineSepArray;
InvalidateProgramm;
- end;
- end; { of procedure TRegExpr.SetLineSeparators
---------------------------------------------------------------}
+ end;
+end; { of procedure TRegExpr.SetLineSeparators
+ -------------------------------------------------------------- }
-procedure TRegExpr.SetLinePairedSeparator (const AStr : RegExprString);
- begin
- if length (AStr) = 2 then begin
- if AStr [1] = AStr [2] then begin
+procedure TRegExpr.SetLinePairedSeparator(const AStr: RegExprString);
+begin
+ if Length(AStr) = 2 then
+ begin
+ if AStr[1] = AStr[2] then
+ begin
// it's impossible for our 'one-point' checking to support
// two chars separator for identical chars
- Error (reeBadLinePairedSeparator);
- EXIT;
- end;
- if not fLinePairedSeparatorAssigned
- or (AStr [1] <> fLinePairedSeparatorHead)
- or (AStr [2] <> fLinePairedSeparatorTail) then begin
- fLinePairedSeparatorAssigned := true;
- fLinePairedSeparatorHead := AStr [1];
- fLinePairedSeparatorTail := AStr [2];
- InvalidateProgramm;
- end;
- end
- else if length (AStr) = 0 then begin
- if fLinePairedSeparatorAssigned then begin
- fLinePairedSeparatorAssigned := false;
- InvalidateProgramm;
- end;
- end
- else Error (reeBadLinePairedSeparator);
- end; { of procedure TRegExpr.SetLinePairedSeparator
---------------------------------------------------------------}
+ Error(reeBadLinePairedSeparator);
+ Exit;
+ end;
+ if not fLinePairedSeparatorAssigned or (AStr[1] <> fLinePairedSeparatorHead)
+ or (AStr[2] <> fLinePairedSeparatorTail) then
+ begin
+ fLinePairedSeparatorAssigned := True;
+ fLinePairedSeparatorHead := AStr[1];
+ fLinePairedSeparatorTail := AStr[2];
+ InvalidateProgramm;
+ end;
+ end
+ else if Length(AStr) = 0 then
+ begin
+ if fLinePairedSeparatorAssigned then
+ begin
+ fLinePairedSeparatorAssigned := False;
+ InvalidateProgramm;
+ end;
+ end
+ else
+ Error(reeBadLinePairedSeparator);
+end; { of procedure TRegExpr.SetLinePairedSeparator
+ -------------------------------------------------------------- }
-function TRegExpr.GetLinePairedSeparator : RegExprString;
- begin
- if fLinePairedSeparatorAssigned then begin
- {$IFDEF UniCode}
- // Here is some UniCode 'magic'
- // If You do know better decision to concatenate
- // two WideChars, please, let me know!
- Result := fLinePairedSeparatorHead; //###0.947
- Result := Result + fLinePairedSeparatorTail;
- {$ELSE}
- Result := fLinePairedSeparatorHead + fLinePairedSeparatorTail;
- {$ENDIF}
- end
- else Result := '';
- end; { of function TRegExpr.GetLinePairedSeparator
---------------------------------------------------------------}
+function TRegExpr.GetLinePairedSeparator: RegExprString;
+begin
+ if fLinePairedSeparatorAssigned then
+ begin
+ {$IFDEF UniCode}
+ // Here is some UniCode 'magic'
+ // If You do know better decision to concatenate
+ // two WideChars, please, let me know!
+ Result := fLinePairedSeparatorHead; // ###0.947
+ Result := Result + fLinePairedSeparatorTail;
+ {$ELSE}
+ Result := fLinePairedSeparatorHead + fLinePairedSeparatorTail;
+ {$ENDIF}
+ end
+ else
+ Result := '';
+end; { of function TRegExpr.GetLinePairedSeparator
+ -------------------------------------------------------------- }
-function TRegExpr.Substitute (const ATemplate : RegExprString) : RegExprString;
+function TRegExpr.Substitute(const ATemplate: RegExprString): RegExprString;
// perform substitutions after a regexp match
-// completely rewritten in 0.929
-type
- TSubstMode = (smodeNormal, smodeOneUpper, smodeOneLower, smodeAllUpper,
- smodeAllLower);
var
- TemplateLen : PtrInt;
- TemplateBeg, TemplateEnd : PRegExprChar;
- p, p0, p1, ResultPtr : PRegExprChar;
- ResultLen : PtrInt;
- n : PtrInt;
- Ch : REChar;
- Mode: TSubstMode;
- QuotedChar: REChar;
+ TemplateBeg, TemplateEnd: PRegExprChar;
- function ParseVarName (var APtr : PRegExprChar) : PtrInt;
+ function ParseVarName(var APtr: PRegExprChar): integer;
// extract name of variable (digits, may be enclosed with
// curly braces) from APtr^, uses TemplateEnd !!!
var
- p : PRegExprChar;
- Delimited : boolean;
+ p: PRegExprChar;
+ Delimited: boolean;
begin
- Result := 0;
- p := APtr;
- Delimited := (p < TemplateEnd) and (p^ = '{');
- if Delimited
- then inc (p); // skip left curly brace
- if (p < TemplateEnd) and (p^ = '&')
- then inc (p) // this is '$&' or '${&}'
+ Result := 0;
+ p := APtr;
+ Delimited := (p < TemplateEnd) and (p^ = '{');
+ if Delimited then
+ Inc(p); // skip left curly brace
+ if (p < TemplateEnd) and (p^ = '&') then
+ Inc(p) // this is '$&' or '${&}'
else
- while (p < TemplateEnd) and IsDigit(p) do
- begin
- Result := Result * 10 + (ord (p^) - ord ('0')); //###0.939
- inc (p);
+ while (p < TemplateEnd) and IsDigitChar(p^) do
+ begin
+ Result := Result * 10 + (Ord(p^) - Ord('0')); // ###0.939
+ Inc(p);
end;
- if Delimited then
- if (p < TemplateEnd) and (p^ = '}')
- then inc (p) // skip right curly brace
- else p := APtr; // isn't properly terminated
- if p = APtr
- then Result := -1; // no valid digits found or no right curly brace
- APtr := p;
+ if Delimited then
+ if (p < TemplateEnd) and (p^ = '}') then
+ Inc(p) // skip right curly brace
+ else
+ p := APtr; // isn't properly terminated
+ if p = APtr then
+ Result := -1; // no valid digits found or no right curly brace
+ APtr := p;
end;
+type
+ TSubstMode = (smodeNormal, smodeOneUpper, smodeOneLower, smodeAllUpper, smodeAllLower);
+var
+ Mode: TSubstMode;
+ p, p0, p1, ResultPtr: PRegExprChar;
+ ResultLen, n: integer;
+ Ch, QuotedChar: REChar;
begin
// Check programm and input string
- if not IsProgrammOk
- then EXIT;
- if not Assigned (fInputString) then begin
- Error (reeNoInputStringSpecified);
- EXIT;
- end;
+ if not IsProgrammOk then
+ Exit;
+ if fInputString = '' then
+ begin
+ Error(reeNoInputStringSpecified);
+ Exit;
+ end;
// Prepare for working
- TemplateLen := length (ATemplate);
- if TemplateLen = 0 then begin // prevent nil pointers
+ if ATemplate = '' then
+ begin // prevent nil pointers
Result := '';
- EXIT;
- end;
- TemplateBeg := pointer (ATemplate);
- TemplateEnd := TemplateBeg + TemplateLen;
+ Exit;
+ end;
+ TemplateBeg := PRegExprChar(ATemplate);
+ TemplateEnd := TemplateBeg + Length(ATemplate);
// Count result length for speed optimization.
ResultLen := 0;
p := TemplateBeg;
- while p < TemplateEnd do begin
+ while p < TemplateEnd do
+ begin
Ch := p^;
- inc (p);
- if Ch = '$'
- then n := ParseVarName (p)
- else n := -1;
- if n >= 0 then begin
- if (n < NSUBEXP) and Assigned (startp [n]) and Assigned (endp [n])
- then inc (ResultLen, endp [n] - startp [n]);
+ Inc(p);
+ if Ch = '$' then
+ n := GrpIndexes[ParseVarName(p)]
+ else
+ n := -1;
+ if n >= 0 then
+ begin
+ Inc(ResultLen, endp[n] - startp[n]);
end
- else begin
- if (Ch = EscChar) and (p < TemplateEnd) then begin // quoted or special char followed
+ else
+ begin
+ if (Ch = EscChar) and (p < TemplateEnd) then
+ begin // quoted or special char followed
Ch := p^;
- inc (p);
+ Inc(p);
case Ch of
- 'n': inc(ResultLen, Length(FReplaceLineEnd));
- 'u', 'l', 'U', 'L': {nothing};
- 'x': begin
- inc(ResultLen);
- if (p^ = '{') then begin // skip \x{....}
- while ((p^ <> '}') and (p < TemplateEnd)) do
- p := p + 1;
- p := p + 1;
- end
- else
- p := p + 2 // skip \x..
+ 'n':
+ Inc(ResultLen, Length(FReplaceLineEnd));
+ 'u', 'l', 'U', 'L': { nothing }
+ ;
+ 'x':
+ begin
+ Inc(ResultLen);
+ if (p^ = '{') then
+ begin // skip \x{....}
+ while ((p^ <> '}') and (p < TemplateEnd)) do
+ p := p + 1;
+ p := p + 1;
+ end
+ else
+ p := p + 2 // skip \x..
end;
- else inc(ResultLen);
+ else
+ Inc(ResultLen);
end;
end
else
- inc(ResultLen);
+ Inc(ResultLen);
end;
end;
// Get memory. We do it once and it significant speed up work !
- if ResultLen = 0 then begin
+ if ResultLen = 0 then
+ begin
Result := '';
- EXIT;
- end;
- //SetString (Result, nil, ResultLen);
- SetLength(Result,ResultLen);
+ Exit;
+ end;
+ SetLength(Result, ResultLen);
// Fill Result
- ResultPtr := pointer (Result);
+ ResultPtr := Pointer(Result);
p := TemplateBeg;
Mode := smodeNormal;
- while p < TemplateEnd do begin
+ while p < TemplateEnd do
+ begin
Ch := p^;
p0 := p;
- inc (p);
+ Inc(p);
p1 := p;
- if Ch = '$'
- then n := ParseVarName (p)
- else n := -1;
- if (n >= 0) then begin
+ if Ch = '$' then
+ n := GrpIndexes[ParseVarName(p)]
+ else
+ n := -1;
+ if (n >= 0) then
+ begin
p0 := startp[n];
p1 := endp[n];
- if (n >= NSUBEXP) or not Assigned (p0) or not Assigned (endp [n]) then
- p1 := p0; // empty
end
- else begin
- if (Ch = EscChar) and (p < TemplateEnd) then begin // quoted or special char followed
+ else
+ begin
+ if (Ch = EscChar) and (p < TemplateEnd) then
+ begin // quoted or special char followed
Ch := p^;
- inc (p);
+ Inc(p);
case Ch of
- 'n' : begin
- p0 := @FReplaceLineEnd[1];
+ 'n':
+ begin
+ p0 := PRegExprChar(FReplaceLineEnd);
p1 := p0 + Length(FReplaceLineEnd);
end;
- 'x', 't', 'r', 'f', 'a', 'e': begin
- p := p - 1; // UnquoteChar expects the escaped char under the pointer
- QuotedChar := UnquoteChar(p);
- p := p + 1; // Skip after last part of the escaped sequence - UnquoteChar stops on the last symbol of it
- p0 := @QuotedChar;
- p1 := p0 + 1;
+ 'x', 't', 'r', 'f', 'a', 'e':
+ begin
+ p := p - 1;
+ // UnquoteChar expects the escaped char under the pointer
+ QuotedChar := UnQuoteChar(p);
+ p := p + 1;
+ // Skip after last part of the escaped sequence - UnquoteChar stops on the last symbol of it
+ p0 := @QuotedChar;
+ p1 := p0 + 1;
end;
- 'l' : begin
+ 'l':
+ begin
Mode := smodeOneLower;
p1 := p0;
end;
- 'L' : begin
+ 'L':
+ begin
Mode := smodeAllLower;
p1 := p0;
end;
- 'u' : begin
+ 'u':
+ begin
Mode := smodeOneUpper;
p1 := p0;
end;
- 'U' : begin
+ 'U':
+ begin
Mode := smodeAllUpper;
p1 := p0;
end;
- else
- begin
- inc(p0);
- inc(p1);
- end;
+ else
+ begin
+ Inc(p0);
+ Inc(p1);
+ end;
end;
end
end;
- if p0 < p1 then begin
- while p0 < p1 do begin
+ if p0 < p1 then
+ begin
+ while p0 < p1 do
+ begin
case Mode of
- smodeOneLower, smodeAllLower:
+ smodeOneLower:
begin
- Ch := p0^;
- Ch := AnsiLowerCase(Ch)[1];
- ResultPtr^ := Ch;
- if Mode = smodeOneLower then
- Mode := smodeNormal;
+ ResultPtr^ := _LowerCase(p0^);
+ Mode := smodeNormal;
end;
- smodeOneUpper, smodeAllUpper:
+ smodeAllLower:
begin
- Ch := p0^;
- Ch := AnsiUpperCase(Ch)[1];
- ResultPtr^ := Ch;
- if Mode = smodeOneUpper then
- Mode := smodeNormal;
+ ResultPtr^ := _LowerCase(p0^);
end;
- else
- ResultPtr^ := p0^;
+ smodeOneUpper:
+ begin
+ ResultPtr^ := _UpperCase(p0^);
+ Mode := smodeNormal;
+ end;
+ smodeAllUpper:
+ begin
+ ResultPtr^ := _UpperCase(p0^);
+ end;
+ else
+ ResultPtr^ := p0^;
end;
- inc (ResultPtr);
- inc (p0);
+ Inc(ResultPtr);
+ Inc(p0);
end;
Mode := smodeNormal;
end;
end;
end; { of function TRegExpr.Substitute
---------------------------------------------------------------}
+ -------------------------------------------------------------- }
-procedure TRegExpr.Split (Const AInputStr : RegExprString; APieces : TStrings);
- var PrevPos : PtrInt;
- begin
+procedure TRegExpr.Split(const AInputStr: RegExprString; APieces: TStrings);
+var
+ PrevPos: PtrInt;
+begin
PrevPos := 1;
- if Exec (AInputStr) then
- REPEAT
- APieces.Add (System.Copy (AInputStr, PrevPos, MatchPos [0] - PrevPos));
- PrevPos := MatchPos [0] + MatchLen [0];
- UNTIL not ExecNext;
- APieces.Add (System.Copy (AInputStr, PrevPos, MaxInt)); // Tail
- end; { of procedure TRegExpr.Split
---------------------------------------------------------------}
-
-function TRegExpr.Replace (Const AInputStr : RegExprString; const AReplaceStr : RegExprString;
- AUseSubstitution : boolean{$IFDEF DefParam}= False{$ENDIF}) : RegExprString;
- var
- PrevPos : PtrInt;
- begin
+ if Exec(AInputStr) then
+ repeat
+ APieces.Add(System.Copy(AInputStr, PrevPos, MatchPos[0] - PrevPos));
+ PrevPos := MatchPos[0] + MatchLen[0];
+ until not ExecNext;
+ APieces.Add(System.Copy(AInputStr, PrevPos, MaxInt)); // Tail
+end; { of procedure TRegExpr.Split
+ -------------------------------------------------------------- }
+
+function TRegExpr.Replace(const AInputStr: RegExprString;
+ const AReplaceStr: RegExprString;
+ AUseSubstitution: boolean = False): RegExprString;
+var
+ PrevPos: PtrInt;
+begin
Result := '';
PrevPos := 1;
- if Exec (AInputStr) then
- REPEAT
- Result := Result + System.Copy (AInputStr, PrevPos,
- MatchPos [0] - PrevPos);
- if AUseSubstitution //###0.946
- then Result := Result + Substitute (AReplaceStr)
- else Result := Result + AReplaceStr;
- PrevPos := MatchPos [0] + MatchLen [0];
- UNTIL not ExecNext;
- Result := Result + System.Copy (AInputStr, PrevPos, MaxInt); // Tail
- end; { of function TRegExpr.Replace
---------------------------------------------------------------}
-
-function TRegExpr.ReplaceEx (Const AInputStr : RegExprString;
- AReplaceFunc : TRegExprReplaceFunction)
- : RegExprString;
- var
- PrevPos : PtrInt;
- begin
+ if Exec(AInputStr) then
+ repeat
+ Result := Result + System.Copy(AInputStr, PrevPos, MatchPos[0] - PrevPos);
+ if AUseSubstitution // ###0.946
+ then
+ Result := Result + Substitute(AReplaceStr)
+ else
+ Result := Result + AReplaceStr;
+ PrevPos := MatchPos[0] + MatchLen[0];
+ until not ExecNext;
+ Result := Result + System.Copy(AInputStr, PrevPos, MaxInt); // Tail
+end; { of function TRegExpr.Replace
+ -------------------------------------------------------------- }
+
+function TRegExpr.ReplaceEx(const AInputStr: RegExprString;
+ AReplaceFunc: TRegExprReplaceFunction): RegExprString;
+var
+ PrevPos: PtrInt;
+begin
Result := '';
PrevPos := 1;
- if Exec (AInputStr) then
- REPEAT
- Result := Result + System.Copy (AInputStr, PrevPos,
- MatchPos [0] - PrevPos)
- + AReplaceFunc (Self);
- PrevPos := MatchPos [0] + MatchLen [0];
- UNTIL not ExecNext;
- Result := Result + System.Copy (AInputStr, PrevPos, MaxInt); // Tail
- end; { of function TRegExpr.ReplaceEx
---------------------------------------------------------------}
+ if Exec(AInputStr) then
+ repeat
+ Result := Result + System.Copy(AInputStr, PrevPos, MatchPos[0] - PrevPos)
+ + AReplaceFunc(Self);
+ PrevPos := MatchPos[0] + MatchLen[0];
+ until not ExecNext;
+ Result := Result + System.Copy(AInputStr, PrevPos, MaxInt); // Tail
+end; { of function TRegExpr.ReplaceEx
+ -------------------------------------------------------------- }
+
+
+function TRegExpr.Replace(const AInputStr: RegExprString;
+ AReplaceFunc: TRegExprReplaceFunction): RegExprString;
+begin
+ Result := ReplaceEx(AInputStr, AReplaceFunc);
+end; { of function TRegExpr.Replace
+ -------------------------------------------------------------- }
+{ ============================================================= }
+{ ====================== Debug section ======================== }
+{ ============================================================= }
-{$IFDEF OverMeth}
-function TRegExpr.Replace (const AInputStr : RegExprString;
- AReplaceFunc : TRegExprReplaceFunction)
- : RegExprString;
- begin
- {$IFDEF FPC}Result:={$ENDIF}ReplaceEx (AInputStr, AReplaceFunc);
- end; { of function TRegExpr.Replace
+{$IFDEF UseFirstCharSet}
+procedure TRegExpr.FillFirstCharSet(prog: PRegExprChar);
+var
+ scan: PRegExprChar; // Current node.
+ Next: PRegExprChar; // Next node.
+ opnd: PRegExprChar;
+ Oper: TREOp;
+ ch: REChar;
+ min_cnt, i: integer;
+ TempSet: TRegExprCharset;
+begin
+ TempSet := [];
+ scan := prog;
+ while scan <> nil do
+ begin
+ Next := regnext(scan);
+ Oper := PREOp(scan)^;
+ case Oper of
+ OP_BSUBEXP,
+ OP_BSUBEXPCI:
+ begin
+ // we cannot optimize r.e. if it starts with back reference
+ FirstCharSet := RegExprAllSet; //###0.930
+ Exit;
+ end;
+ OP_BOL,
+ OP_BOLML:
+ ; // Exit; //###0.937
+ OP_EOL,
+ OP_EOLML:
+ begin //###0.948 was empty in 0.947, was EXIT in 0.937
+ Include(FirstCharSet, 0);
+ if ModifierM then
+ for i := 1 to Length(LineSeparators) do
+ Include(FirstCharSet, byte(LineSeparators[i]));
+ Exit;
+ end;
+ OP_BOUND,
+ OP_NOTBOUND:
+ ; //###0.943 ?!!
+ OP_ANY,
+ OP_ANYML:
+ begin // we can better define ANYML !!!
+ FirstCharSet := RegExprAllSet; //###0.930
+ Exit;
+ end;
+ OP_ANYDIGIT:
+ begin
+ FirstCharSet := FirstCharSet + RegExprDigitSet;
+ Exit;
+ end;
+ OP_NOTDIGIT:
+ begin
+ FirstCharSet := FirstCharSet + (RegExprAllSet - RegExprDigitSet);
+ Exit;
+ end;
+ OP_ANYLETTER:
+ begin
+ GetCharSetFromWordChars(TempSet);
+ FirstCharSet := FirstCharSet + TempSet;
+ Exit;
+ end;
+ OP_NOTLETTER:
+ begin
+ GetCharSetFromWordChars(TempSet);
+ FirstCharSet := FirstCharSet + (RegExprAllSet - TempSet);
+ Exit;
+ end;
+ OP_ANYSPACE:
+ begin
+ GetCharSetFromSpaceChars(TempSet);
+ FirstCharSet := FirstCharSet + TempSet;
+ Exit;
+ end;
+ OP_NOTSPACE:
+ begin
+ GetCharSetFromSpaceChars(TempSet);
+ FirstCharSet := FirstCharSet + (RegExprAllSet - TempSet);
+ Exit;
+ end;
+ OP_ANYVERTSEP:
+ begin
+ FirstCharSet := FirstCharSet + RegExprLineSeparatorsSet;
+ Exit;
+ end;
+ OP_NOTVERTSEP:
+ begin
+ FirstCharSet := FirstCharSet + (RegExprAllSet - RegExprLineSeparatorsSet);
+ Exit;
+ end;
+ OP_ANYHORZSEP:
+ begin
+ FirstCharSet := FirstCharSet + RegExprHorzSeparatorsSet;
+ Exit;
+ end;
+ OP_NOTHORZSEP:
+ begin
+ FirstCharSet := FirstCharSet + (RegExprAllSet - RegExprHorzSeparatorsSet);
+ Exit;
+ end;
+ OP_EXACTLYCI:
+ begin
+ ch := (scan + REOpSz + RENextOffSz + RENumberSz)^;
+ {$IFDEF UniCode}
+ if Ord(ch) <= $FF then
+ {$ENDIF}
+ begin
+ Include(FirstCharSet, byte(ch));
+ Include(FirstCharSet, byte(InvertCase(ch)));
+ end;
+ Exit;
+ end;
+ OP_EXACTLY:
+ begin
+ ch := (scan + REOpSz + RENextOffSz + RENumberSz)^;
+ {$IFDEF UniCode}
+ if Ord(ch) <= $FF then
+ {$ENDIF}
+ Include(FirstCharSet, byte(ch));
+ Exit;
+ end;
+ OP_ANYOF:
+ begin
+ GetCharSetFromCharClass(scan + REOpSz + RENextOffSz, False, TempSet);
+ FirstCharSet := FirstCharSet + TempSet;
+ Exit;
+ end;
+ OP_ANYBUT:
+ begin
+ GetCharSetFromCharClass(scan + REOpSz + RENextOffSz, False, TempSet);
+ FirstCharSet := FirstCharSet + (RegExprAllSet - TempSet);
+ Exit;
+ end;
+ OP_ANYOFCI:
+ begin
+ GetCharSetFromCharClass(scan + REOpSz + RENextOffSz, True, TempSet);
+ FirstCharSet := FirstCharSet + TempSet;
+ Exit;
+ end;
+ OP_ANYBUTCI:
+ begin
+ GetCharSetFromCharClass(scan + REOpSz + RENextOffSz, True, TempSet);
+ FirstCharSet := FirstCharSet + (RegExprAllSet - TempSet);
+ Exit;
+ end;
+ OP_NOTHING:
+ ;
+ OP_COMMENT:
+ ;
+ OP_BACK:
+ ;
+ Succ(OP_OPEN) .. TREOp(Ord(OP_OPEN) + NSUBEXP - 1):
+ begin //###0.929
+ FillFirstCharSet(Next);
+ Exit;
+ end;
+ Succ(OP_CLOSE) .. TREOp(Ord(OP_CLOSE) + NSUBEXP - 1):
+ begin //###0.929
+ FillFirstCharSet(Next);
+ Exit;
+ end;
+ OP_BRANCH:
+ begin
+ if (PREOp(Next)^ <> OP_BRANCH) // No choice.
+ then
+ Next := scan + REOpSz + RENextOffSz // Avoid recursion.
+ else
+ begin
+ repeat
+ FillFirstCharSet(scan + REOpSz + RENextOffSz);
+ scan := regnext(scan);
+ until (scan = nil) or (PREOp(scan)^ <> OP_BRANCH);
+ Exit;
+ end;
+ end;
+ {$IFDEF ComplexBraces}
+ OP_LOOPENTRY:
+ begin //###0.925
+ //LoopStack [LoopStackIdx] := 0; //###0.940 line removed
+ FillFirstCharSet(Next); // execute LOOP
+ Exit;
+ end;
+ OP_LOOP,
+ OP_LOOPNG:
+ begin //###0.940
+ opnd := scan + PRENextOff(AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz * 2))^;
+ min_cnt := PREBracesArg(AlignToPtr(scan + REOpSz + RENextOffSz))^;
+ FillFirstCharSet(opnd);
+ if min_cnt = 0 then
+ FillFirstCharSet(Next);
+ Exit;
+ end;
+ {$ENDIF}
+ OP_STAR,
+ OP_STARNG: //###0.940
+ FillFirstCharSet(scan + REOpSz + RENextOffSz);
+ OP_PLUS,
+ OP_PLUSNG:
+ begin //###0.940
+ FillFirstCharSet(scan + REOpSz + RENextOffSz);
+ Exit;
+ end;
+ OP_BRACES,
+ OP_BRACESNG:
+ begin //###0.940
+ opnd := scan + REOpSz + RENextOffSz + REBracesArgSz * 2;
+ min_cnt := PREBracesArg(AlignToPtr(scan + REOpSz + RENextOffSz))^; // BRACES
+ FillFirstCharSet(opnd);
+ if min_cnt > 0 then
+ Exit;
+ end;
+ OP_EEND:
+ begin
+ FirstCharSet := RegExprAllSet; //###0.948
+ Exit;
+ end;
+ else
+ begin
+ fLastErrorOpcode := Oper;
+ Error(reeUnknownOpcodeInFillFirst);
+ Exit;
+ end;
+ end; { of case scan^}
+ scan := Next;
+ end; { of while scan <> nil}
+end; { of procedure FillFirstCharSet
--------------------------------------------------------------}
{$ENDIF}
-{=============================================================}
-{====================== Debug section ========================}
-{=============================================================}
+procedure TRegExpr.InitCharCheckers;
+var
+ Cnt: integer;
+ //
+ function Add(AChecker: TRegExprCharChecker): byte;
+ begin
+ Inc(Cnt);
+ if Cnt > High(CharCheckers) then
+ raise Exception.Create('Too small CharCheckers array');
+ CharCheckers[Cnt - 1] := AChecker;
+ Result := Cnt - 1;
+ end;
+ //
+begin
+ Cnt := 0;
+ FillChar(CharCheckers, SizeOf(CharCheckers), 0);
+
+ CheckerIndex_Word := Add(CharChecker_Word);
+ CheckerIndex_NotWord := Add(CharChecker_NotWord);
+ CheckerIndex_Space := Add(CharChecker_Space);
+ CheckerIndex_NotSpace := Add(CharChecker_NotSpace);
+ CheckerIndex_Digit := Add(CharChecker_Digit);
+ CheckerIndex_NotDigit := Add(CharChecker_NotDigit);
+ CheckerIndex_VertSep := Add(CharChecker_VertSep);
+ CheckerIndex_NotVertSep := Add(CharChecker_NotVertSep);
+ CheckerIndex_HorzSep := Add(CharChecker_HorzSep);
+ CheckerIndex_NotHorzSep := Add(CharChecker_NotHorzSep);
+ //CheckerIndex_AllAZ := Add(CharChecker_AllAZ);
+ CheckerIndex_LowerAZ := Add(CharChecker_LowerAZ);
+ CheckerIndex_UpperAZ := Add(CharChecker_UpperAZ);
+
+ SetLength(CharCheckerInfos, 3);
+ with CharCheckerInfos[0] do
+ begin
+ CharBegin := 'a';
+ CharEnd:= 'z';
+ CheckerIndex := CheckerIndex_LowerAZ;
+ end;
+ with CharCheckerInfos[1] do
+ begin
+ CharBegin := 'A';
+ CharEnd := 'Z';
+ CheckerIndex := CheckerIndex_UpperAZ;
+ end;
+ with CharCheckerInfos[2] do
+ begin
+ CharBegin := '0';
+ CharEnd := '9';
+ CheckerIndex := CheckerIndex_Digit;
+ end;
+end;
+
+function TRegExpr.CharChecker_Word(ch: REChar): boolean;
+begin
+ Result := IsWordChar(ch);
+end;
+
+function TRegExpr.CharChecker_NotWord(ch: REChar): boolean;
+begin
+ Result := not IsWordChar(ch);
+end;
+
+function TRegExpr.CharChecker_Space(ch: REChar): boolean;
+begin
+ Result := IsSpaceChar(ch);
+end;
+
+function TRegExpr.CharChecker_NotSpace(ch: REChar): boolean;
+begin
+ Result := not IsSpaceChar(ch);
+end;
+
+function TRegExpr.CharChecker_Digit(ch: REChar): boolean;
+begin
+ Result := IsDigitChar(ch);
+end;
+
+function TRegExpr.CharChecker_NotDigit(ch: REChar): boolean;
+begin
+ Result := not IsDigitChar(ch);
+end;
+
+function TRegExpr.CharChecker_VertSep(ch: REChar): boolean;
+begin
+ Result := IsLineSeparator(ch);
+end;
+
+function TRegExpr.CharChecker_NotVertSep(ch: REChar): boolean;
+begin
+ Result := not IsLineSeparator(ch);
+end;
+
+function TRegExpr.CharChecker_HorzSep(ch: REChar): boolean;
+begin
+ Result := IsHorzSeparator(ch);
+end;
+
+function TRegExpr.CharChecker_NotHorzSep(ch: REChar): boolean;
+begin
+ Result := not IsHorzSeparator(ch);
+end;
+
+function TRegExpr.CharChecker_LowerAZ(ch: REChar): boolean;
+begin
+ case ch of
+ 'a' .. 'z':
+ Result := True;
+ else
+ Result := False;
+ end;
+end;
+
+function TRegExpr.CharChecker_UpperAZ(ch: REChar): boolean;
+begin
+ case ch of
+ 'A' .. 'Z':
+ Result := True;
+ else
+ Result := False;
+ end;
+end;
+
{$IFDEF RegExpPCodeDump}
-function TRegExpr.DumpOp (op : TREOp) : RegExprString;
+
+function TRegExpr.DumpOp(op: TREOp): RegExprString;
// printable representation of opcode
- begin
+begin
case op of
- BOL: Result := 'BOL';
- EOL: Result := 'EOL';
- BOLML: Result := 'BOLML';
- EOLML: Result := 'EOLML';
- BOUND: Result := 'BOUND'; //###0.943
- NOTBOUND: Result := 'NOTBOUND'; //###0.943
- ANY: Result := 'ANY';
- ANYML: Result := 'ANYML'; //###0.941
- ANYLETTER: Result := 'ANYLETTER';
- NOTLETTER: Result := 'NOTLETTER';
- ANYDIGIT: Result := 'ANYDIGIT';
- NOTDIGIT: Result := 'NOTDIGIT';
- ANYSPACE: Result := 'ANYSPACE';
- NOTSPACE: Result := 'NOTSPACE';
- ANYOF: Result := 'ANYOF';
- ANYBUT: Result := 'ANYBUT';
- ANYOFCI: Result := 'ANYOF/CI';
- ANYBUTCI: Result := 'ANYBUT/CI';
- BRANCH: Result := 'BRANCH';
- EXACTLY: Result := 'EXACTLY';
- EXACTLYCI: Result := 'EXACTLY/CI';
- NOTHING: Result := 'NOTHING';
- COMMENT: Result := 'COMMENT';
- BACK: Result := 'BACK';
- EEND: Result := 'END';
- BSUBEXP: Result := 'BSUBEXP';
- BSUBEXPCI: Result := 'BSUBEXP/CI';
- Succ (OPEN) .. TREOp (Ord (OPEN) + NSUBEXP - 1): //###0.929
- Result := Format ('OPEN[%d]', [ord (op) - ord (OPEN)]);
- Succ (CLOSE) .. TREOp (Ord (CLOSE) + NSUBEXP - 1): //###0.929
- Result := Format ('CLOSE[%d]', [ord (op) - ord (CLOSE)]);
- STAR: Result := 'STAR';
- PLUS: Result := 'PLUS';
- BRACES: Result := 'BRACES';
+ OP_BOL:
+ Result := 'BOL';
+ OP_EOL:
+ Result := 'EOL';
+ OP_BOLML:
+ Result := 'BOLML';
+ OP_EOLML:
+ Result := 'EOLML';
+ OP_BOUND:
+ Result := 'BOUND'; // ###0.943
+ OP_NOTBOUND:
+ Result := 'NOTBOUND'; // ###0.943
+ OP_ANY:
+ Result := 'ANY';
+ OP_ANYML:
+ Result := 'ANYML'; // ###0.941
+ OP_ANYLETTER:
+ Result := 'ANYLETTER';
+ OP_NOTLETTER:
+ Result := 'NOTLETTER';
+ OP_ANYDIGIT:
+ Result := 'ANYDIGIT';
+ OP_NOTDIGIT:
+ Result := 'NOTDIGIT';
+ OP_ANYSPACE:
+ Result := 'ANYSPACE';
+ OP_NOTSPACE:
+ Result := 'NOTSPACE';
+ OP_ANYHORZSEP:
+ Result := 'ANYHORZSEP';
+ OP_NOTHORZSEP:
+ Result := 'NOTHORZSEP';
+ OP_ANYVERTSEP:
+ Result := 'ANYVERTSEP';
+ OP_NOTVERTSEP:
+ Result := 'NOTVERTSEP';
+ OP_ANYOF:
+ Result := 'ANYOF';
+ OP_ANYBUT:
+ Result := 'ANYBUT';
+ OP_ANYOFCI:
+ Result := 'ANYOF/CI';
+ OP_ANYBUTCI:
+ Result := 'ANYBUT/CI';
+ OP_BRANCH:
+ Result := 'BRANCH';
+ OP_EXACTLY:
+ Result := 'EXACTLY';
+ OP_EXACTLYCI:
+ Result := 'EXACTLY/CI';
+ OP_NOTHING:
+ Result := 'NOTHING';
+ OP_COMMENT:
+ Result := 'COMMENT';
+ OP_BACK:
+ Result := 'BACK';
+ OP_EEND:
+ Result := 'END';
+ OP_BSUBEXP:
+ Result := 'BSUBEXP';
+ OP_BSUBEXPCI:
+ Result := 'BSUBEXP/CI';
+ Succ(OP_OPEN) .. TREOp(Ord(OP_OPEN) + NSUBEXP - 1): // ###0.929
+ Result := Format('OPEN[%d]', [Ord(op) - Ord(OP_OPEN)]);
+ Succ(OP_CLOSE) .. TREOp(Ord(OP_CLOSE) + NSUBEXP - 1): // ###0.929
+ Result := Format('CLOSE[%d]', [Ord(op) - Ord(OP_CLOSE)]);
+ OP_STAR:
+ Result := 'STAR';
+ OP_PLUS:
+ Result := 'PLUS';
+ OP_BRACES:
+ Result := 'BRACES';
{$IFDEF ComplexBraces}
- LOOPENTRY: Result := 'LOOPENTRY'; //###0.925
- LOOP: Result := 'LOOP'; //###0.925
- LOOPNG: Result := 'LOOPNG'; //###0.940
- {$ENDIF}
- ANYOFTINYSET: Result:= 'ANYOFTINYSET';
- ANYBUTTINYSET:Result:= 'ANYBUTTINYSET';
- {$IFDEF UseSetOfChar} //###0.929
- ANYOFFULLSET: Result:= 'ANYOFFULLSET';
+ OP_LOOPENTRY:
+ Result := 'LOOPENTRY'; // ###0.925
+ OP_LOOP:
+ Result := 'LOOP'; // ###0.925
+ OP_LOOPNG:
+ Result := 'LOOPNG'; // ###0.940
{$ENDIF}
- STARNG: Result := 'STARNG'; //###0.940
- PLUSNG: Result := 'PLUSNG'; //###0.940
- BRACESNG: Result := 'BRACESNG'; //###0.940
- else Error (reeDumpCorruptedOpcode);
- end; {of case op}
+ OP_STARNG:
+ Result := 'STARNG'; // ###0.940
+ OP_PLUSNG:
+ Result := 'PLUSNG'; // ###0.940
+ OP_BRACESNG:
+ Result := 'BRACESNG'; // ###0.940
+ else
+ Error(reeDumpCorruptedOpcode);
+ end; { of case op }
Result := ':' + Result;
- end; { of function TRegExpr.DumpOp
---------------------------------------------------------------}
+end; { of function TRegExpr.DumpOp
+ -------------------------------------------------------------- }
-function TRegExpr.Dump : RegExprString;
+function TRegExpr.Dump: RegExprString;
// dump a regexp in vaguely comprehensible form
- var
- s : PRegExprChar;
- op : TREOp; // Arbitrary non-END op.
- next : PRegExprChar;
- i : PtrInt;
- Diff : PtrInt;
-{$IFDEF UseSetOfChar} //###0.929
- Ch : REChar;
-{$ENDIF}
- function PrintableChar(AChar: REChar): string; inline;
- begin
- if AChar < ' '
- then Result := '#' + IntToStr (Ord (AChar))
- else Result := AChar;
- end;
- begin
- if not IsProgrammOk //###0.929
- then EXIT;
+var
+ s: PRegExprChar;
+ op: TREOp; // Arbitrary non-END op.
+ next: PRegExprChar;
+ i, NLen: integer;
+ Diff: PtrInt;
+ Ch: AnsiChar;
+
+ function PrintableChar(AChar: REChar): string; {$IFDEF InlineFuncs}inline;{$ENDIF}
+ begin
+ if AChar < ' ' then
+ Result := '#' + IntToStr(Ord(AChar))
+ else
+ Result := AChar;
+ end;
- op := EXACTLY;
+begin
+ if not IsProgrammOk then
+ Exit;
+
+ op := OP_EXACTLY;
Result := '';
s := programm + REOpSz;
- while op <> EEND do begin // While that wasn't END last time...
- op := s^;
- Result := Result + Format ('%2d%s', [s - programm, DumpOp (s^)]); // Where, what.
- next := regnext (s);
- if next = nil // Next ptr.
- then Result := Result + ' (0)'
- else begin
- if next > s //###0.948 PWideChar subtraction workaround (see comments in Tail method for details)
- then Diff := next - s
- else Diff := - (s - next);
- Result := Result + Format (' (%d) ', [(s - programm) + Diff]);
- end;
- inc (s, REOpSz + RENextOffSz);
- if (op = ANYOF) or (op = ANYOFCI) or (op = ANYBUT) or (op = ANYBUTCI)
- or (op = EXACTLY) or (op = EXACTLYCI) then begin
- // Literal string, where present.
- while s^ <> #0 do begin
- Result := Result + PrintableChar(s^);
- inc (s);
- end;
- inc (s);
- end;
- if (op = ANYOFTINYSET) or (op = ANYBUTTINYSET) then begin
- for i := 1 to TinySetLen do begin
- Result := Result + s^;
- inc (s);
+ while op <> OP_EEND do
+ begin // While that wasn't END last time...
+ op := s^;
+ Result := Result + Format('%2d%s', [s - programm, DumpOp(s^)]);
+ // Where, what.
+ next := regnext(s);
+ if next = nil // Next ptr.
+ then
+ Result := Result + ' (0)'
+ else
+ begin
+ if next > s
+ // ###0.948 PWideChar subtraction workaround (see comments in Tail method for details)
+ then
+ Diff := next - s
+ else
+ Diff := -(s - next);
+ Result := Result + Format(' (%d) ', [(s - programm) + Diff]);
+ end;
+ Inc(s, REOpSz + RENextOffSz);
+ if (op = OP_ANYOF) or (op = OP_ANYOFCI) or (op = OP_ANYBUT) or (op = OP_ANYBUTCI) then
+ begin
+ repeat
+ case s^ of
+ OpKind_End:
+ begin
+ Inc(s);
+ Break;
+ end;
+ OpKind_Range:
+ begin
+ Result := Result + 'Rng(';
+ Inc(s);
+ Result := Result + PrintableChar(s^) + '-';
+ Inc(s);
+ Result := Result + PrintableChar(s^);
+ Result := Result + ') ';
+ Inc(s);
+ end;
+ OpKind_MetaClass:
+ begin
+ Inc(s);
+ Result := Result + '\' + PrintableChar(s^) + ' ';
+ Inc(s);
+ end;
+ OpKind_Char:
+ begin
+ Inc(s);
+ NLen := PLongInt(s)^;
+ Inc(s, RENumberSz);
+ Result := Result + 'Ch(';
+ for i := 1 to NLen do
+ begin
+ Result := Result + PrintableChar(s^);
+ Inc(s);
+ end;
+ Result := Result + ') ';
+ end;
+ else
+ Error(reeDumpCorruptedOpcode);
end;
+ until false;
+ end;
+ if (op = OP_EXACTLY) or (op = OP_EXACTLYCI) then
+ begin
+ // Literal string, where present.
+ NLen := PLongInt(s)^;
+ Inc(s, RENumberSz);
+ for i := 1 to NLen do
+ begin
+ Result := Result + PrintableChar(s^);
+ Inc(s);
end;
- if (op = BSUBEXP) or (op = BSUBEXPCI) then begin
- Result := Result + ' \' + IntToStr (Ord (s^));
- inc (s);
- end;
- {$IFDEF UseSetOfChar} //###0.929
- if op = ANYOFFULLSET then begin
- for Ch := #0 to #255 do
- if Ch in PSetOfREChar (s)^ then
- Result := Result + PrintableChar(Ch);
- inc (s, SizeOf (TSetOfREChar));
- end;
- {$ENDIF}
- if (op = BRACES) or (op = BRACESNG) then begin //###0.941
- // show min/max argument of BRACES operator
- Result := Result + Format ('{%d,%d}', [PREBracesArg (AlignToInt(s))^, PREBracesArg (AlignToInt(s + REBracesArgSz))^]);
- inc (s, REBracesArgSz * 2);
- end;
- {$IFDEF ComplexBraces}
- if (op = LOOP) or (op = LOOPNG) then begin //###0.940
- Result := Result + Format (' -> (%d) {%d,%d}', [
- (s - programm - (REOpSz + RENextOffSz)) + PRENextOff (AlignToPtr(s + 2 * REBracesArgSz))^,
- PREBracesArg (AlignToInt(s))^, PREBracesArg (AlignToInt(s + REBracesArgSz))^]);
- inc (s, 2 * REBracesArgSz + RENextOffSz);
- end;
- {$ENDIF}
- Result := Result + #$d#$a;
- end; { of while}
+ end;
+ if (op = OP_BSUBEXP) or (op = OP_BSUBEXPCI) then
+ begin
+ Result := Result + ' \' + IntToStr(Ord(s^));
+ Inc(s);
+ end;
+ if (op = OP_BRACES) or (op = OP_BRACESNG) then
+ begin // ###0.941
+ // show min/max argument of braces operator
+ Result := Result + Format('{%d,%d}', [PREBracesArg(AlignToInt(s))^,
+ PREBracesArg(AlignToInt(s + REBracesArgSz))^]);
+ Inc(s, REBracesArgSz * 2);
+ end;
+ {$IFDEF ComplexBraces}
+ if (op = OP_LOOP) or (op = OP_LOOPNG) then
+ begin // ###0.940
+ Result := Result + Format(' -> (%d) {%d,%d}',
+ [(s - programm - (REOpSz + RENextOffSz)) +
+ PRENextOff(AlignToPtr(s + 2 * REBracesArgSz))^,
+ PREBracesArg(AlignToInt(s))^,
+ PREBracesArg(AlignToInt(s + REBracesArgSz))^]);
+ Inc(s, 2 * REBracesArgSz + RENextOffSz);
+ end;
+ {$ENDIF}
+ Result := Result + #$d#$a;
+ end; { of while }
// Header fields of interest.
-
- if regstart <> #0
- then Result := Result + 'start ' + regstart;
- if reganch <> #0
- then Result := Result + 'anchored ';
- if regmust <> nil
- then Result := Result + 'must have ' + regmust;
- {$IFDEF UseFirstCharSet} //###0.929
- Result := Result + #$d#$a'FirstCharSet:';
+ if reganchored <> #0 then
+ Result := Result + 'Anchored; ';
+ if regmustString <> '' then
+ Result := Result + 'Must have: "' + regmustString + '"; ';
+
+ {$IFDEF UseFirstCharSet} // ###0.929
+ Result := Result + #$d#$a'First charset: ';
+ if FirstCharSet = [] then
+ Result := Result + '<empty set>'
+ else
+ if FirstCharSet = RegExprAllSet then
+ Result := Result + '<all chars>'
+ else
for Ch := #0 to #255 do
- if Ch in FirstCharSet
- then begin
- if Ch < ' '
- then Result := Result + PrintableChar(Ch) //###0.948
- else Result := Result + Ch;
+ if byte(Ch) in FirstCharSet then
+ begin
+ if Ch < ' ' then
+ Result := Result + PrintableChar(Ch) // ###0.948
+ else
+ Result := Result + Ch;
end;
{$ENDIF}
Result := Result + #$d#$a;
- end; { of function TRegExpr.Dump
---------------------------------------------------------------}
+end; { of function TRegExpr.Dump
+ -------------------------------------------------------------- }
+{$ENDIF}
+
+{$IFDEF reRealExceptionAddr}
+{$OPTIMIZATION ON}
+// ReturnAddr works correctly only if compiler optimization is ON
+// I placed this method at very end of unit because there are no
+// way to restore compiler optimization flag ...
{$ENDIF}
-procedure TRegExpr.Error (AErrorID : integer);
- var
- e : ERegExpr;
- begin
+procedure TRegExpr.Error(AErrorID: integer);
+ {$IFDEF reRealExceptionAddr}
+ function ReturnAddr: Pointer; // ###0.938
+ asm
+ mov eax,[ebp+4]
+ end;
+ {$ENDIF}
+var
+ e: ERegExpr;
+begin
fLastError := AErrorID; // dummy stub - useless because will raise exception
if AErrorID < 1000 // compilation error ?
- then e := ERegExpr.Create (ErrorMsg (AErrorID) // yes - show error pos
- + ' (pos ' + IntToStr (CompilerErrorPos) + ')')
- else e := ERegExpr.Create (ErrorMsg (AErrorID));
+ then
+ e := ERegExpr.Create(ErrorMsg(AErrorID) // yes - show error pos
+ + ' (pos ' + IntToStr(CompilerErrorPos) + ')')
+ else
+ e := ERegExpr.Create(ErrorMsg(AErrorID));
e.ErrorCode := AErrorID;
e.CompilerErrorPos := CompilerErrorPos;
raise e
+ {$IFDEF reRealExceptionAddr}
+ at ReturnAddr; // ###0.938
+ {$ENDIF}
end; { of procedure TRegExpr.Error
---------------------------------------------------------------}
+ -------------------------------------------------------------- }
(*
PCode persistence:
- FirstCharSet
- programm, regsize
- regstart // -> programm
- reganch // -> programm
- regmust, regmlen // -> programm
- fExprIsCompiled
+ FirstCharSet
+ programm, regsize
+ reganchored // -> programm
+ regmust, regmustlen // -> programm
+ fExprIsCompiled
*)
-
+// be carefull - placed here code will be always compiled with
+// compiler optimization flag
initialization
- RegExprInvertCaseFunction := TRegExpr.InvertCaseFunction;
-end.
+ RegExprInvertCaseFunction := TRegExpr.InvertCaseFunction;
+end.