summaryrefslogtreecommitdiff
path: root/utils/unicode/unicodeset.pas
diff options
context:
space:
mode:
Diffstat (limited to 'utils/unicode/unicodeset.pas')
-rw-r--r--utils/unicode/unicodeset.pas91
1 files changed, 81 insertions, 10 deletions
diff --git a/utils/unicode/unicodeset.pas b/utils/unicode/unicodeset.pas
index 0be30432f4..974e8748e5 100644
--- a/utils/unicode/unicodeset.pas
+++ b/utils/unicode/unicodeset.pas
@@ -1,6 +1,6 @@
{ UnicodeSet implementation.
- Copyright (c) 2013 by Inoussa OUEDRAOGO
+ Copyright (c) 2013-2015 by Inoussa OUEDRAOGO
The source code is distributed under the Library GNU
General Public License with the following modification:
@@ -36,6 +36,8 @@ type
TUnicodeSet = class;
+ { TPatternParser }
+
TPatternParser = class
private
FBufferStr : UnicodeString;
@@ -43,6 +45,7 @@ type
FBufferLength : Integer;
FSet : TUnicodeSet;
FPosition : Integer;
+ FSpecialChar: Boolean;
private
procedure Error(const AMsg : string; const AArgs : array of const);overload;inline;
procedure Error(const AMsg : string);overload;inline;
@@ -58,6 +61,7 @@ type
function NextChar() : TUnicodeCodePoint;
procedure ParseItem();
procedure DoParse();
+ property SpecialChar : Boolean read FSpecialChar;
public
procedure Parse(const APattern : PUnicodeChar; const ALength : Integer);overload;
procedure Parse(const APattern : UnicodeString);overload;inline;
@@ -73,6 +77,8 @@ type
class function Compare(const A, B : TUnicodeCodePointArray) : Integer;static;inline;
end;
+ { TUnicodeSet }
+
TUnicodeSet = class
private type
TItem = TUnicodeCodePointArray;
@@ -84,18 +90,22 @@ type
FParser : TPatternParser;
private
procedure CreateParser();inline;
+ function InternalContains(const AString : UnicodeString) : Boolean;overload;
public
constructor Create();
destructor Destroy;override;
procedure Add(AChar : TUnicodeCodePoint);inline;overload;
procedure Add(AString : TUnicodeCodePointArray);inline;overload;
procedure AddRange(const AStart, AEnd : TUnicodeCodePoint);inline;
- procedure AddPattern(const APattern : UnicodeString);inline;
+ procedure AddPattern(const APattern : UnicodeString);inline;overload;
+ procedure AddPattern(const APattern : RawByteString);inline;overload;
function CreateIterator() : TIterator;
function Contains(const AString : array of TUnicodeCodePoint) : Boolean;overload;
function Contains(const AChar : TUnicodeCodePoint) : Boolean;inline;overload;
function Contains(const AChar : UnicodeChar) : Boolean;inline;overload;
function Contains(const AChar : AnsiChar) : Boolean;inline;overload;
+ function Contains(const AString : UnicodeString) : Boolean;overload;
+ function Contains(const AString : RawByteString) : Boolean;overload;
end;
resourcestring
@@ -267,7 +277,7 @@ begin
CheckEOF(4);
s := Copy(FBufferStr,(FPosition+1),4);
Inc(FPosition,4);
- if not TryStrToInt('$'+s,i) then
+ if not TryStrToInt(string('$'+s),i) then
Error(SExpectedBut,['\uXXXX',s]);
cp := i;
end;
@@ -281,6 +291,7 @@ begin
Inc(FPosition);
end;
end;
+ FSpecialChar := (cp = Ord('{')) or (cp = Ord('}'));
Result := cp;
end;
@@ -292,7 +303,8 @@ end;
procedure TPatternParser.ParseItem();
var
cp, lastCp : TUnicodeCodePoint;
- charCount : Integer;
+ charCount, k : Integer;
+ cpa : TUnicodeCodePointArray;
begin
SkipSpaces();
Expect('[');
@@ -304,13 +316,31 @@ begin
cp := NextChar();
if CompareTo(cp,']') then
Break;
- if CompareTo(cp,'-') then begin
- if (charCount = 0) then
- Error(SExpectedBut,['<char>','-']);
- cp := NextChar();
- FSet.AddRange(lastCp,cp);
+ if SpecialChar and (cp = Ord('{')) then begin
+ SetLength(cpa,12);
+ k := 0;
+ while True do begin
+ cp := NextChar();
+ if SpecialChar and (cp = Ord('}')) then
+ break;
+ if (k >= Length(cpa)) then
+ SetLength(cpa,(2*k));
+ cpa[k] := cp;
+ k := k+1;
+ end;
+ if (k > 0) then begin
+ SetLength(cpa,k);
+ FSet.Add(cpa);
+ end;
end else begin
- FSet.Add(cp);
+ if CompareTo(cp,'-') then begin
+ if (charCount = 0) then
+ Error(SExpectedBut,['<char>','-']);
+ cp := NextChar();
+ FSet.AddRange(lastCp,cp);
+ end else begin
+ FSet.Add(cp);
+ end;
end;
Inc(charCount);
end;
@@ -348,6 +378,22 @@ begin
end;
end;
+function TUnicodeSet.InternalContains(const AString: UnicodeString): Boolean;
+var
+ u4 : UCS4String;
+ c, i : Integer;
+ cpa : TUnicodeCodePointArray;
+begin
+ u4 := UnicodeStringToUCS4String(AString);
+ c := Length(u4)-1;
+ if (c = 1) then
+ exit(Contains(u4[0]));
+ SetLength(cpa,c);
+ for i := 0 to c-1 do
+ cpa[i] := u4[i];
+ Result := Contains(cpa);
+end;
+
constructor TUnicodeSet.Create;
begin
FTree := TTree.Create();
@@ -387,6 +433,14 @@ begin
FParser.Parse(APattern);
end;
+procedure TUnicodeSet.AddPattern(const APattern: RawByteString);
+var
+ us : UnicodeString;
+begin
+ us := UnicodeString(APattern);
+ AddPattern(us);
+end;
+
function TUnicodeSet.CreateIterator() : TIterator;
begin
Result := FTree.CreateForwardIterator();
@@ -422,5 +476,22 @@ begin
Result := Contains(TUnicodeCodePoint(Ord(AChar)));
end;
+function TUnicodeSet.Contains(const AString: UnicodeString): Boolean;
+begin
+ if (AString = '') then
+ exit(Contains([]));
+ if (Length(AString) = 1) then
+ exit(Contains(AString[1]));
+ Result := InternalContains(AString);
+end;
+
+function TUnicodeSet.Contains(const AString: RawByteString): Boolean;
+var
+ us : UnicodeString;
+begin
+ us := UnicodeString(AString);
+ Result := Contains(us);
+end;
+
end.