summaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
authormarco <marco@3ad0048d-3df7-0310-abae-a5850022a9f2>2016-05-24 20:05:14 +0000
committermarco <marco@3ad0048d-3df7-0310-abae-a5850022a9f2>2016-05-24 20:05:14 +0000
commit683dae6a0e5c2a0bd0169501cfddcb2af1223bb5 (patch)
tree42dea2789ef188994b4f04f8bec5d540b653c05e /utils
parent86fa73d7722e895417c9e99cd6dab852c30e48a6 (diff)
downloadfpc-683dae6a0e5c2a0bd0169501cfddcb2af1223bb5.tar.gz
# revisions: 33613,33629,33632,33708,33710,33728,33729,33730,33734,33742
git-svn-id: http://svn.freepascal.org/svn/fpc/branches/fixes_3_0@33786 3ad0048d-3df7-0310-abae-a5850022a9f2
Diffstat (limited to 'utils')
-rw-r--r--utils/unicode/cldrhelper.pas631
-rw-r--r--utils/unicode/cldrparser.lpr99
-rw-r--r--utils/unicode/cldrtest.pas2283
-rw-r--r--utils/unicode/cldrtxt.pas687
-rw-r--r--utils/unicode/cldrxml.pas538
-rw-r--r--utils/unicode/grbtree.pas4
-rw-r--r--utils/unicode/helper.pas219
-rw-r--r--utils/unicode/unicodeset.pas91
8 files changed, 4403 insertions, 149 deletions
diff --git a/utils/unicode/cldrhelper.pas b/utils/unicode/cldrhelper.pas
index a1495244f5..1ae155cba4 100644
--- a/utils/unicode/cldrhelper.pas
+++ b/utils/unicode/cldrhelper.pas
@@ -1,6 +1,6 @@
{ CLDR collation helper unit.
- Copyright (c) 2013 by Inoussa OUEDRAOGO
+ Copyright (c) 2013-2015 by Inoussa OUEDRAOGO
The source code is distributed under the Library GNU
General Public License with the following modification:
@@ -47,9 +47,6 @@ type
TUCA_LineRecArray = array of TUCA_LineRec;
-
-//----------------------------------------------------
-
ECldrException = class(Exception)
end;
@@ -66,12 +63,23 @@ type
FirstNonIgnorable, LastNonIgnorable,
FirstTrailing, LastTrailing
);
+const
+ FixableReorderLogicalSet = [
+ TReorderLogicalReset.LastRegular,TReorderLogicalReset.FirstTrailing,
+ TReorderLogicalReset.LastTrailing
+ ];
+
+type
TCollationField = (BackWard, VariableLowLimit, VariableHighLimit);
TCollationFields = set of TCollationField;
+ PReorderUnit = ^TReorderUnit;
+
{ TReorderUnit }
TReorderUnit = X_PACKED record
+ private
+ FVirtualPosition : TReorderLogicalReset;
public
Context : TUnicodeCodePointArray;
ExpansionChars : TUnicodeCodePointArray;
@@ -80,6 +88,9 @@ type
InitialPosition : Integer;
Changed : Boolean;
public
+ property VirtualPosition : TReorderLogicalReset read FVirtualPosition;
+ function IsVirtual() : Boolean;inline;
+ public
class function From(
const AChars,
AContext : array of TUnicodeCodePoint;
@@ -102,15 +113,19 @@ type
const AWeigthKind : TReorderWeigthKind;
const AInitialPosition : Integer
) : TReorderUnit;static;overload;
+ class function From(
+ const AReset : TReorderLogicalReset
+ ) : TReorderUnit;static;overload;
procedure SetExpansion(const AChars : array of TUnicodeCodePoint);
procedure SetExpansion(const AChar : TUnicodeCodePoint);
procedure Clear();
- procedure Assign(const AItem : TReorderUnit);
+ procedure Assign(const AItem : PReorderUnit);
function HasContext() : Boolean;
function IsExpansion() : Boolean;
end;
- PReorderUnit = ^TReorderUnit;
+
+ PReorderSequence = ^TReorderSequence;
{ TReorderSequence }
@@ -122,8 +137,9 @@ type
Before : Boolean;
public
procedure Clear();
+ procedure SetElementCount(const ALength : Integer);
+ procedure Assign(ASource : PReorderSequence);
end;
- PReorderSequence = ^TReorderSequence;
TReorderSequenceArray = array of TReorderSequence;
{ TOrderedCharacters }
@@ -150,28 +166,68 @@ type
end;
POrderedCharacters = ^TOrderedCharacters;
+ { TCldrImport }
+
+ TCldrImport = class
+ private
+ FSource: string;
+ FTypeName: string;
+ public
+ property Source : string read FSource;
+ property TypeName : string read FTypeName;
+ end;
+
+ { TCldrImportList }
+
+ TCldrImportList = class
+ private
+ FItems : array of TCldrImport;
+ private
+ function GetCount: Integer;
+ function GetItem(AIndex : Integer): TCldrImport;
+ public
+ destructor Destroy();override;
+ procedure Clear();
+ function IndexOf(const ASource, AType : string) : Integer;
+ function Find(const ASource, AType : string) : TCldrImport;
+ function Add(const ASource, AType : string) : TCldrImport;
+ property Count : Integer read GetCount;
+ property Item[AIndex : Integer] : TCldrImport read GetItem;default;
+ end;
+
TCldrCollation = class;
{ TCldrCollationItem }
TCldrCollationItem = class
private
+ FAlt: string;
FBackwards: Boolean;
FBase: string;
FChangedFields: TCollationFields;
+ FImports: TCldrImportList;
FParent: TCldrCollation;
FRules: TReorderSequenceArray;
FTypeName: string;
public
+ constructor Create();
+ destructor Destroy;override;
procedure Clear();
+ function IsPrivate() : Boolean;
property Parent : TCldrCollation read FParent;
property TypeName : string read FTypeName write FTypeName;
+ property Alt : string read FAlt write FAlt;
property Base : string read FBase write FBase;
property Backwards : Boolean read FBackwards write FBackwards;
property Rules : TReorderSequenceArray read FRules write FRules;
property ChangedFields : TCollationFields read FChangedFields write FChangedFields;
+ property Imports : TCldrImportList read FImports;
end;
+ TCldrParserMode = (HeaderParsing, FullParsing);
+
+ TCldrCollationRepository = class;
+
{ TCldrCollation }
TCldrCollation = class
@@ -181,24 +237,81 @@ type
FDefaultType: string;
FVersion: string;
FLanguage: string;
+ FMode: TCldrParserMode;
+ FRepository: TCldrCollationRepository;
private
function GetItem(Index : Integer): TCldrCollationItem;
function GetItemCount: Integer;
public
destructor Destroy();override;
procedure Clear();
- function IndexOf(const AItemName : string) : Integer;
- function Find(const AItemName : string) : TCldrCollationItem;
+ function IndexOf(const AItemName : string) : Integer;overload;
+ function IndexOf(const AItemName, AItemAlt : string) : Integer;overload;
+ function Find(const AItemName : string) : TCldrCollationItem;overload;
+ function Find(const AItemName, AItemAlt : string) : TCldrCollationItem;overload;
function Add(AItem : TCldrCollationItem) : Integer;
+ function FindPublicItemCount() : Integer;
property Language : string read FLanguage write FLanguage;
property LocalID : string read FLocalID write FLocalID;
property Version : string read FVersion write FVersion;
property DefaultType : string read FDefaultType write FDefaultType;
property ItemCount : Integer read GetItemCount;
property Items[Index : Integer] : TCldrCollationItem read GetItem;
+ property Mode : TCldrParserMode read FMode write FMode;
+ property Repository : TCldrCollationRepository read FRepository;
end;
- TCldrParserMode = (HeaderParsing, FullParsing);
+ ICldrCollationLoader = interface
+ ['{117AAC84-06CE-4EC8-9B07-4E81EC23930C}']
+ procedure LoadCollation(
+ const ALanguage : string;
+ ACollation : TCldrCollation;
+ AMode : TCldrParserMode
+ );
+ procedure LoadCollationType(
+ const ALanguage,
+ ATypeName : string;
+ AType : TCldrCollationItem
+ );
+ end;
+
+ { TCldrCollationRepository }
+
+ TCldrCollationRepository = class
+ private
+ FItems : array of TCldrCollation;
+ FLoader: ICldrCollationLoader;
+ private
+ function GetItem(const AIndex : Integer): TCldrCollation;
+ function GetItemCount: Integer;
+ function IndexOfItem(AItem : TCldrCollation) : Integer;
+ procedure Add(AItem : TCldrCollation);
+ public
+ constructor Create(ALoader : ICldrCollationLoader);
+ destructor Destroy;override;
+ procedure FreeItems();
+ procedure Clear();
+ procedure SetLoader(AValue : ICldrCollationLoader);
+ function IndexOf(const ALanguage : string) : Integer;
+ function Find(const ALanguage : string) : TCldrCollation;
+ function Load(const ALanguage : string; const AMode : TCldrParserMode) : TCldrCollation;
+ property ItemCount : Integer read GetItemCount;
+ property Items[const AIndex : Integer] : TCldrCollation read GetItem;
+ property Loader : ICldrCollationLoader read FLoader;
+ end;
+
+ TRuleVisiterFunction =
+ function(
+ ARule : PReorderSequence;
+ AOwner : TCldrCollationItem;
+ AData : Pointer
+ ) : Boolean;
+
+ function ForEachRule(
+ ACollationType : TCldrCollationItem;
+ AVisitFunc : TRuleVisiterFunction;
+ ACustomData : Pointer
+ ) : Boolean;
function ComputeWeigths(
const AData : PReorderUnit;
@@ -240,10 +353,47 @@ type
const ASequenceLength : Integer
) : Integer;
+ function TryStrToLogicalReorder(
+ const AValue : string;
+ out AResult : TReorderLogicalReset
+ ) : Boolean;
+
+
+resourcestring
+ sCaseNothandled = 'This case is not handled : "%s", Position = %d.';
+ sCodePointExpected = 'Code Point node expected as child at this position "%d".';
+ sCollationsExistsAlready = 'This collation already exists : "%s"';
+ sCollationsNodeNotFound = '"collations" node not found.';
+ sCollationTypeNotFound = 'collation "Type" not found : "%s".';
+ sHexAttributeExpected = '"hex" attribute expected at this position "%d".';
+ sInvalidResetClause = 'Invalid "Reset" clause.';
+ sNodeNameAssertMessage = 'Expected NodeName "%s", got "%s".';
+ sRulesNodeNotFound = '"rules" node not found.';
+ sTextNodeChildExpected = '(Child) text node expected at this position "%d", but got "%s".';
+ sUniqueChildNodeExpected = 'Unique child node expected at this position "%d".';
+ sUnknownResetLogicalPosition = 'Unknown reset logical position : "%s".';
+ sVirtualIsReadOnly = 'Virtual logical "Reset" items are read only.';
+
implementation
uses
RtlConsts, typinfo;
+function TryStrToLogicalReorder(
+ const AValue : string;
+ out AResult : TReorderLogicalReset
+) : Boolean;
+var
+ s : string;
+ i : Integer;
+begin
+ s := StringReplace(AValue,' ','',[rfReplaceAll]);
+ s := StringReplace(s,'_','',[rfReplaceAll]);
+ i := GetEnumValue(TypeInfo(TReorderLogicalReset),s);
+ Result := (i > -1);
+ if Result then
+ AResult := TReorderLogicalReset(i);
+end;
+
function ToStr(const ACharacters : array of TUnicodeCodePoint): string;
var
i : Integer;
@@ -457,6 +607,11 @@ begin
end;
if (APosition = TReorderLogicalReset.LastNonIgnorable) then
exit(c);
+ for i := 0 to c - 1 do begin
+ if (p^.VirtualPosition = APosition) then
+ exit(i);
+ Inc(p);
+ end;
end;
procedure ApplyStatementToSequence(
@@ -491,6 +646,12 @@ var
end else begin
if (kr = 0) then
exit(0);
+ pd := @ASequence.Data[kr];
+ if pd^.IsVirtual() and (pd^.VirtualPosition in FixableReorderLogicalSet) then begin
+ kr := kr-1;
+ if (kr = 0) then
+ exit;
+ end;
kk := kr;
pd := @ASequence.Data[kk];
if (pd^.WeigthKind = TReorderWeigthKind.Primary) then begin
@@ -518,8 +679,8 @@ begin
pst := AStatement;
for h := 0 to AStatementCount - 1 do begin
locResetPos := -1;
- if (AStatement^.LogicalPosition > TReorderLogicalReset.None) then
- locResetPos := FindLogicalPos(@ASequence,AStatement^.LogicalPosition)
+ if (pst^.LogicalPosition > TReorderLogicalReset.None) then
+ locResetPos := FindLogicalPos(@ASequence,pst^.LogicalPosition)
else if (Length(pst^.Reset) > 0) then begin
locResetPos := IndexOf(pst^.Reset,[],@ASequence.Data[0],ASequence.ActualLength);
{if (locResetPos = -1) then
@@ -527,6 +688,15 @@ begin
if (locResetPos = -1) then
locResetPos := ASequence.ActualLength;
end;
+ if (pst^.LogicalPosition in FixableReorderLogicalSet) then begin
+ if (locResetPos < 0) or
+ (locResetPos >= ASequence.ActualLength) or
+ not(ASequence.Data[locResetPos].VirtualPosition in FixableReorderLogicalSet)
+ then begin
+ locResetPos := ASequence.Append(TReorderUnit.From(pst^.LogicalPosition));
+ end;
+ end;
+
pse := @pst^.Elements[0];
kr := locResetPos;
k := GetNextInsertPos();
@@ -718,6 +888,47 @@ begin
end;
end;
+function IsIgnorable(AWeight : TUCA_WeightRecArray) : Boolean;
+var
+ i : Integer;
+begin
+ if (Length(AWeight) = 0) then
+ exit(True);
+ for i := Low(AWeight) to High(AWeight) do begin
+ if (AWeight[i].Weights[0] <> 0) or
+ (AWeight[i].Weights[1] <> 0) or
+ (AWeight[i].Weights[2] <> 0)
+ then begin
+ exit(False);
+ end;
+ end;
+ Result := True;
+end;
+
+function RemoveIgnorables(
+ AItem : TUnicodeCodePointArray;
+ const AList : PUCA_LineRec;
+ const AListLen : Integer
+) : TUnicodeCodePointArray;
+var
+ i, c, k : Integer;
+begin
+ SetLength(Result,Length(AItem));
+ c := 0;
+ for i := 0 to Length(AItem) - 1 do begin
+ k := IndexOf([AItem[i]],AList,AListLen);
+ if (k >= 0) and
+ IsIgnorable(AList[k].Weights)
+ then
+ k := -1;
+ if (k >= 0) then begin
+ Result[c] := AItem[i];
+ c := c+1;
+ end;
+ end;
+ SetLength(Result,c);
+end;
+
function Compress(
const AData : TUCA_LineRecArray;
out AResult : TUCA_LineRecArray
@@ -806,7 +1017,7 @@ begin
q := AData;
p := AData;
for i := 0 to ADataLen - 1 do begin
- if p^.Changed then begin
+ if not(p^.IsVirtual()) and p^.Changed then begin
suffixChar := p^.Characters[0];
for k := 0 to ADataLen - 1 do begin
if not(q[k].Changed) and (q[k].Characters[0] = suffixChar) then begin
@@ -821,7 +1032,7 @@ end;
{$include weight_derivation.inc}
-function ComputeWeigths(
+function InternalComputeWeigths(
const AData : PReorderUnit;
const ADataLen : Integer;
const ADataWeigths : TUCA_LineRecArray;
@@ -1116,6 +1327,41 @@ begin
Result := Length(AResult);
end;
+function ComputeWeigths(
+ const AData : PReorderUnit;
+ const ADataLen : Integer;
+ const ADataWeigths : TUCA_LineRecArray;
+ out AResult : TUCA_LineRecArray
+) : Integer;
+var
+ locData : array of TReorderUnit;
+ i, actualLength : Integer;
+ p : PReorderUnit;
+begin
+ SetLength(locData,ADataLen);
+ actualLength := 0;
+ p := AData;
+ for i := 0 to ADataLen-1 do begin
+ if not p^.IsVirtual() then begin
+ locData[actualLength].Assign(p);
+ actualLength := actualLength+1;
+ end;
+ Inc(p);
+ end;
+ if (Length(locData) <> actualLength) then
+ SetLength(locData,actualLength);
+ Result := InternalComputeWeigths(@locData[0],actualLength,ADataWeigths,AResult);
+
+ p := AData;
+ for i := 0 to actualLength-1 do begin
+ while p^.IsVirtual() do begin
+ Inc(p);
+ end;
+ p^.Assign(@locData[i]);
+ Inc(p);
+ end;
+end;
+
function FillInitialPositions(
AData : PReorderUnit;
const ADataLen : Integer;
@@ -1125,6 +1371,7 @@ var
locNotFound, i, cw : Integer;
p : PReorderUnit;
pw : PUCA_LineRec;
+ chars : TUnicodeCodePointArray;
begin
locNotFound := 0;
cw := Length(ADataWeigths);
@@ -1135,6 +1382,10 @@ begin
p := AData;
for i := 0 to ADataLen - 1 do begin
p^.InitialPosition := IndexOf(p^.Characters,pw,cw) + 1;
+ if (p^.InitialPosition = 0) then begin
+ chars := RemoveIgnorables(p^.Characters,pw,cw);
+ p^.InitialPosition := IndexOf(chars,pw,cw) + 1;
+ end;
if (p^.InitialPosition = 0) then
Inc(locNotFound);
Inc(p);
@@ -1142,8 +1393,212 @@ begin
Result := locNotFound;
end;
+{ TCldrImportList }
+
+function TCldrImportList.GetCount: Integer;
+begin
+ Result := Length(FItems);
+end;
+
+function TCldrImportList.GetItem(AIndex : Integer): TCldrImport;
+begin
+ if (AIndex < 0) or (AIndex >= Length(FItems)) then
+ raise ERangeError.CreateFmt(SListIndexError,[AIndex]);
+ Result := FItems[AIndex];
+end;
+
+destructor TCldrImportList.Destroy();
+begin
+ Clear();
+ inherited;
+end;
+
+procedure TCldrImportList.Clear();
+var
+ i : Integer;
+begin
+ for i := Low(FItems) to High(FItems) do
+ FreeAndNil(FItems[i]);
+ SetLength(FItems,0);
+end;
+
+function TCldrImportList.IndexOf(const ASource, AType: string): Integer;
+var
+ i : Integer;
+begin
+ for i := Low(FItems) to High(FItems) do begin
+ if (FItems[i].Source = ASource) and (FItems[i].TypeName = AType) then begin
+ Result := i;
+ exit;
+ end;
+ end;
+ Result := -1;
+end;
+
+function TCldrImportList.Find(const ASource, AType: string): TCldrImport;
+var
+ i : Integer;
+begin
+ i := IndexOf(ASource,AType);
+ if (i >= 0) then
+ Result := FItems[i]
+ else
+ Result := nil;
+end;
+
+function TCldrImportList.Add(const ASource, AType: string): TCldrImport;
+var
+ i : Integer;
+begin
+ i := IndexOf(ASource,AType);
+ if (i >= 0) then begin
+ Result := FItems[i];
+ end else begin
+ Result := TCldrImport.Create();
+ Result.FSource := ASource;
+ Result.FTypeName := AType;
+ i := Length(FItems);
+ SetLength(FItems,(i+1));
+ FItems[i] := Result;
+ end;
+end;
+
+{ TCldrCollationRepository }
+
+function TCldrCollationRepository.GetItem(const AIndex : Integer): TCldrCollation;
+begin
+ if (AIndex < 0) or (AIndex >= Length(FItems)) then
+ raise ERangeError.CreateFmt(SListIndexError,[AIndex]);
+ Result := FItems[AIndex];
+end;
+
+function TCldrCollationRepository.GetItemCount: Integer;
+begin
+ Result := Length(FItems);
+end;
+
+function TCldrCollationRepository.IndexOfItem(AItem: TCldrCollation): Integer;
+var
+ i : Integer;
+begin
+ for i := Low(FItems) to High(FItems) do begin
+ if (FItems[i] = AItem) then begin
+ Result := i;
+ exit;
+ end;
+ end;
+ Result := -1;
+end;
+
+procedure TCldrCollationRepository.Add(AItem: TCldrCollation);
+var
+ i : Integer;
+begin
+ if (AItem = nil) then
+ raise EArgumentException.CreateFmt(SParamIsNil,['AItem: TCldrCollation']);
+ if (IndexOfItem(AItem) >= 0) then
+ raise EArgumentException.CreateFmt(sCollationsExistsAlready,[AItem.Language]);
+ i := Length(FItems);
+ SetLength(FItems,(i+1));
+ AItem.FRepository := Self;
+ FItems[i] := AItem;
+end;
+
+constructor TCldrCollationRepository.Create(ALoader: ICldrCollationLoader);
+begin
+ if (ALoader = nil) then
+ raise EArgumentException.CreateFmt(SInvalidPropertyElement,['Loader']);
+ SetLoader(ALoader);
+end;
+
+destructor TCldrCollationRepository.Destroy;
+begin
+ Clear();
+ inherited Destroy;
+end;
+
+procedure TCldrCollationRepository.FreeItems();
+var
+ i : Integer;
+begin
+ for i := 0 to Length(FItems) - 1 do
+ FreeAndNil(FItems[i]);
+ SetLength(FItems,0);
+end;
+
+procedure TCldrCollationRepository.Clear();
+begin
+ FreeItems();
+end;
+
+procedure TCldrCollationRepository.SetLoader(AValue: ICldrCollationLoader);
+begin
+ if (FLoader <> AValue) then
+ FLoader := AValue;
+end;
+
+function TCldrCollationRepository.IndexOf(const ALanguage: string): Integer;
+var
+ i : Integer;
+begin
+ for i := Low(FItems) to High(FItems) do begin
+ if (FItems[i].Language = ALanguage) then begin
+ Result := i;
+ exit;
+ end
+ end;
+ Result := -1;
+end;
+
+function TCldrCollationRepository.Find(const ALanguage: string): TCldrCollation;
+var
+ i : Integer;
+begin
+ i := IndexOf(ALanguage);
+ if (i >= 0) then
+ Result := FItems[i]
+ else
+ Result := nil;
+end;
+
+function TCldrCollationRepository.Load(
+ const ALanguage : string;
+ const AMode : TCldrParserMode
+) : TCldrCollation;
+var
+ isnew : Boolean;
+begin
+ Result := Find(ALanguage);
+ if (Result <> nil) then begin
+ if (Result.Mode = TCldrParserMode.FullParsing) or (Result.Mode = AMode) then
+ exit;
+ end;
+ isnew := (Result = nil);
+ if isnew then
+ Result := TCldrCollation.Create();
+ try
+ Loader.LoadCollation(ALanguage,Result,AMode);
+ Add(Result);
+ except
+ if isnew then
+ FreeAndNil(Result);
+ raise;
+ end;
+end;
+
{ TCldrCollationItem }
+constructor TCldrCollationItem.Create;
+begin
+ FImports := TCldrImportList.Create();
+end;
+
+destructor TCldrCollationItem.Destroy;
+begin
+ FImports.Free();
+ inherited Destroy;
+end;
+
procedure TCldrCollationItem.Clear();
begin
FBackwards := False;
@@ -1151,6 +1606,12 @@ begin
FChangedFields := [];
SetLength(FRules,0);
FTypeName := '';
+ FImports.Clear();
+end;
+
+function TCldrCollationItem.IsPrivate() : Boolean;
+begin
+ Result := (Pos('private-',TypeName) = 1);
end;
{ TCldrCollation }
@@ -1195,6 +1656,20 @@ begin
Result := -1;
end;
+function TCldrCollation.IndexOf(const AItemName, AItemAlt: string): Integer;
+var
+ i : Integer;
+begin
+ for i := 0 to ItemCount - 1 do begin
+ if SameText(AItemName,Items[i].TypeName) and
+ SameText(AItemAlt,Items[i].Alt)
+ then begin
+ exit(i);
+ end;
+ end;
+ Result := -1;
+end;
+
function TCldrCollation.Find(const AItemName: string): TCldrCollationItem;
var
i : Integer;
@@ -1206,6 +1681,17 @@ begin
Result := Items[i];
end;
+function TCldrCollation.Find(const AItemName, AItemAlt: string): TCldrCollationItem;
+var
+ i : Integer;
+begin
+ i := IndexOf(AItemName,AItemAlt);
+ if (i = - 1) then
+ Result := nil
+ else
+ Result := Items[i];
+end;
+
function TCldrCollation.Add(AItem: TCldrCollationItem): Integer;
begin
Result := Length(FItems);
@@ -1214,6 +1700,18 @@ begin
AItem.FParent := Self;
end;
+function TCldrCollation.FindPublicItemCount() : Integer;
+var
+ r, i : Integer;
+begin
+ r := 0;
+ for i := 0 to ItemCount-1 do begin
+ if not Items[i].IsPrivate() then
+ r := r+1;
+ end;
+ Result := r;
+end;
+
{ TReorderSequence }
procedure TReorderSequence.Clear();
@@ -1224,8 +1722,34 @@ begin
Before := False;
end;
+procedure TReorderSequence.SetElementCount(const ALength: Integer);
+begin
+ SetLength(Elements,ALength);
+end;
+
+procedure TReorderSequence.Assign(ASource: PReorderSequence);
+var
+ c, i : Integer;
+begin
+ if (ASource = nil) then begin
+ Self.Clear();
+ exit;
+ end;
+ Self.Reset := Copy(ASource^.Reset);
+ c := Length(ASource^.Elements);
+ SetLength(Self.Elements,c);
+ for i := 0 to c-1 do
+ Self.Elements[i].Assign(@ASource^.Elements[i]);
+ Self.Before := ASource^.Before;
+end;
+
{ TReorderUnit }
+function TReorderUnit.IsVirtual() : Boolean;
+begin
+ Result := (FVirtualPosition > TReorderLogicalReset.None);
+end;
+
class function TReorderUnit.From(
const AChars,
AContext : array of TUnicodeCodePoint;
@@ -1235,6 +1759,7 @@ class function TReorderUnit.From(
var
c : Integer;
begin
+ Result.Clear();
c := Length(AChars);
SetLength(Result.Characters,c);
if (c > 0) then
@@ -1276,10 +1801,19 @@ begin
Result := From([AChar],AContext,AWeigthKind,AInitialPosition);
end;
+class function TReorderUnit.From(const AReset: TReorderLogicalReset): TReorderUnit;
+begin
+ Result.Clear();
+ Result.FVirtualPosition := AReset;
+end;
+
procedure TReorderUnit.SetExpansion(const AChars: array of TUnicodeCodePoint);
var
c : Integer;
begin
+ if IsVirtual() then
+ raise ECldrException.Create(sVirtualIsReadOnly);
+
c := Length(AChars);
SetLength(ExpansionChars,c);
if (c > 0) then
@@ -1288,11 +1822,15 @@ end;
procedure TReorderUnit.SetExpansion(const AChar: TUnicodeCodePoint);
begin
+ if IsVirtual() then
+ raise ECldrException.Create(sVirtualIsReadOnly);
+
SetExpansion([AChar]);
end;
procedure TReorderUnit.Clear();
begin
+ Self.FVirtualPosition := TReorderLogicalReset(0);
Self.Characters := nil;
Self.Context := nil;
Self.ExpansionChars := nil;
@@ -1301,16 +1839,19 @@ begin
Self.Changed := False;
end;
-procedure TReorderUnit.Assign(const AItem : TReorderUnit);
+procedure TReorderUnit.Assign(const AItem : PReorderUnit);
begin
Clear();
- Self.Characters := Copy(AItem.Characters);
- //SetLength(Self.Context,Length(AItem.Context));
- Self.Context := Copy(AItem.Context);
- Self.ExpansionChars := Copy(AItem.ExpansionChars);
- Self.WeigthKind := AItem.WeigthKind;
- Self.InitialPosition := AItem.InitialPosition;
- Self.Changed := AItem.Changed;
+ if (AItem <> nil) then begin
+ Self.FVirtualPosition := AItem^.VirtualPosition;
+ Self.Characters := Copy(AItem^.Characters);
+ //SetLength(Self.Context,Length(AItem^.Context));
+ Self.Context := Copy(AItem^.Context);
+ Self.ExpansionChars := Copy(AItem^.ExpansionChars);
+ Self.WeigthKind := AItem^.WeigthKind;
+ Self.InitialPosition := AItem^.InitialPosition;
+ Self.Changed := AItem^.Changed;
+ end;
end;
function TReorderUnit.HasContext() : Boolean;
@@ -1365,7 +1906,7 @@ begin
Result.Clear();
SetLength(Result.Data,Self.ActualLength);
for i := 0 to Length(Result.Data) - 1 do
- Result.Data[i].Assign(Self.Data[i]);
+ Result.Data[i].Assign(@Self.Data[i]);
Result.FActualLength := Self.FActualLength;
end;
@@ -1381,13 +1922,13 @@ begin
if (ActualLength=0) then begin
EnsureSize(ActualLength + 1);
p := @Data[0];
- p^.Assign(AItem);
+ p^.Assign(@AItem);
p^.Changed := True;
exit(0);
end;
k := IndexOf(AItem.Characters,AItem.Context,@Data[0],ActualLength);
if (k = ADestPos) then begin
- Data[ADestPos].Assign(AItem);
+ Data[ADestPos].Assign(@AItem);
Data[ADestPos].Changed := True;
exit(k);
end;
@@ -1399,7 +1940,7 @@ begin
Data[c].Clear();
p := @Data[finalPos];
if (finalPos = ActualLength) then begin
- p^.Assign(AItem);
+ p^.Assign(@AItem);
p^.Changed := True;
end else begin
if (c > 0) then begin
@@ -1416,7 +1957,7 @@ begin
(ActualLength-(finalPos+1))*SizeOf(TReorderUnit)
);}
FillChar(Pointer(p)^,SizeOf(TReorderUnit),0);
- p^.Assign(AItem);
+ p^.Assign(@AItem);
p^.Changed := True;
end;
if (k >= 0) then begin
@@ -1636,6 +2177,42 @@ begin
CompareProps(ASource^.Items, PUCA_PropItemRec(@y[0]),ASource^.ItemSize);
end;
+function ForEachRule(
+ ACollationType : TCldrCollationItem;
+ AVisitFunc : TRuleVisiterFunction;
+ ACustomData : Pointer
+) : Boolean;
+var
+ i : Integer;
+ locImport : TCldrImport;
+ locRep : TCldrCollationRepository;
+ locCollation : TCldrCollation;
+ locType : TCldrCollationItem;
+ locRules : TReorderSequenceArray;
+begin
+ Result := False;
+ if not Assigned(AVisitFunc) then
+ exit;
+ if (ACollationType.Imports.Count > 0) then begin
+ locRep := ACollationType.Parent.Repository;
+ for i := 0 to ACollationType.Imports.Count-1 do begin
+ locImport := ACollationType.Imports[i];
+ locCollation := locRep.Load(locImport.Source,TCldrParserMode.FullParsing);
+ locType := locCollation.Find(locImport.TypeName);
+ if (locType = nil) then
+ raise ECldrException.CreateFmt(sCollationTypeNotFound,[locImport.TypeName]);
+ if not ForEachRule(locType,AVisitFunc,ACustomData) then
+ exit;
+ end;
+ end;
+ locRules := ACollationType.Rules;
+ for i := Low(locRules) to High(locRules) do begin
+ if not AVisitFunc(@locRules[i],ACollationType,ACustomData) then
+ exit;
+ end;
+ Result := True;
+end;
+
procedure GenerateCdlrCollation(
ACollation : TCldrCollation;
AItemName : string;
diff --git a/utils/unicode/cldrparser.lpr b/utils/unicode/cldrparser.lpr
index a8d98796c1..d4030de3f0 100644
--- a/utils/unicode/cldrparser.lpr
+++ b/utils/unicode/cldrparser.lpr
@@ -1,5 +1,5 @@
{ Unicode CLDR's collation parser.
- Copyright (c) 2013 by Inoussa OUEDRAOGO
+ Copyright (c) 2013-2015 by Inoussa OUEDRAOGO
It creates units from CLDR's collation files.
@@ -23,16 +23,18 @@ program cldrparser;
{$mode objfpc}{$H+}
{ $define WINCE_TEST}
+{$TYPEDADDRESS ON}
uses
SysUtils, classes, getopts,{$ifdef WINCE}StreamIO,{$endif}
- cldrhelper, helper, cldrtest, cldrxml, unicodeset;
+ cldrhelper, helper, cldrtest, cldrxml, unicodeset, cldrtxt;
const
+ SROOT_RULES_FILE = 'UCA_Rules_SHORT.txt';
SUsageText =
'This program creates pascal units from CLDR''s collation files for usage ' + sLineBreak +
'with the FreePascal Native Unicode Manager.' + sLineBreak + sLineBreak +
- 'Usage : cldrparser <collationFileName> [<typeName>] [-d<dataDir>] [-o<outputDir>] [-t]' + sLineBreak + sLineBreak +
+ 'Usage : cldrparser <collationFileName> [<typeName>] [-a<alt>] [-d<dataDir>] [-o<outputDir>] [-t<HaltOnFail>]' + sLineBreak + sLineBreak +
' where :' + sLineBreak +
' ' + sLineBreak +
' - collationFileName : specify the target file.' + sLineBreak +
@@ -43,24 +45,30 @@ const
' * the type named "standard" ' + sLineBreak +
' * the type named "search" ' + sLineBreak +
' * the first type.' + sLineBreak +
+ ' - a : this provides the "alt" property to select specific "type".' + sLineBreak +
' - dataDir : specify the directory that contains the collation files.' + sLineBreak +
' The default value is the program''s directory.' + sLineBreak +
' - outputDir : specify the directory where the generated files will be stored.' + sLineBreak +
' The default value is the program''s directory.' + sLineBreak +
' - t : to execute parser the test suite. The program will execute only the test suite and exit.' + sLineBreak +
+ ' <HaltOnFail> may be one of (y, Y, t, T, 1) to halt the execution on the first failing.' + sLineBreak +
' ' + sLineBreak +
' The program expects some files to be present in the <dataDir> folder : ' + sLineBreak +
- ' - UCA_Rules_SHORT.xml found in the CollationAuxiliary.zip available on unicode.org' + sLineBreak +
- ' - allkeys.txt this is the file allkeys_CLDR.txt contained in CollationAuxiliary.zip renamed to allkeys.txt' + sLineBreak +
- ' The CollationAuxiliary.zip archive is provided by unicode in the "unicode collation algorithm data files" section.';
+ ' - UCA_Rules_SHORT.xml ' + sLineBreak +
+ ' - allkeys.txt this is the file allkeys_CLDR.txt renamed to allkeys.txt' + sLineBreak +
+ ' These files are in the core.zip file of the CLDR release files. The CLDR''version used should be synchronized the' + sLineBreak +
+ ' version of the Unicode version used, for example for Uniocde 7 it will be CLDR 26.' + sLineBreak +
+ ' The CLDR files are provided by the Unicode Consortium at http://cldr.unicode.org/index/downloads';
function ParseOptions(
var ADataDir,
AOuputDir,
ACollationFileName,
- ACollationTypeName : string;
- var AExecTestSuite : Boolean
+ ACollationTypeName,
+ ACollationTypeAlt : string;
+ var AExecTestSuite,
+ ATestHaltOnFail : Boolean
) : Boolean;
var
c : Char;
@@ -78,8 +86,9 @@ begin
Result := True;
AExecTestSuite := False;
repeat
- c := GetOpt('d:o:ht');
+ c := GetOpt('a:d:o:ht:');
case c of
+ 'a' : ACollationTypeAlt := Trim(OptArg);
'd' : ADataDir := ExpandFileName(Trim(OptArg));
'o' : AOuputDir := ExpandFileName(Trim(OptArg));
'h', '?' :
@@ -87,7 +96,12 @@ begin
WriteLn(SUsageText);
Result := False;
end;
- 't' : AExecTestSuite := True;
+ 't' :
+ begin
+ AExecTestSuite := True;
+ s := Trim(OptArg);
+ ATestHaltOnFail := (s <> '') and CharInSet(s[1],['y','Y','t','T','1']);
+ end;
end;
until (c = EndOfOptions);
idx := 0;
@@ -109,12 +123,12 @@ var
orderedChars : TOrderedCharacters;
ucaBook : TUCA_DataBook;
stream, streamNE, streamOE, binaryStreamNE, binaryStreamOE : TMemoryStream;
- s, collationFileName, collationTypeName : string;
+ s, collationFileName, collationTypeName, collationTypeAlt : string;
i , c: Integer;
collation : TCldrCollation;
dataPath, outputPath : string;
collationItem : TCldrCollationItem;
- testSuiteFlag : Boolean;
+ testSuiteFlag, testSuiteHaltOnFailFlag : Boolean;
{$ifdef WINCE}
fs : TFileStream;
{$endif WINCE}
@@ -147,11 +161,20 @@ begin
outputPath := '';
collationFileName := '';
collationTypeName := '';
+ collationTypeAlt := '';
testSuiteFlag := False;
- if not ParseOptions(dataPath,outputPath,collationFileName,collationTypeName,testSuiteFlag) then
+ testSuiteHaltOnFailFlag := True;
+ if not ParseOptions(
+ dataPath,outputPath,collationFileName,collationTypeName,
+ collationTypeAlt,testSuiteFlag,testSuiteHaltOnFailFlag
+ )
+ then begin
+ WriteLn(SUsageText);
Halt(1);
+ end;
if testSuiteFlag then begin
- exec_tests();
+ WriteLn('Executing the test suite ...');
+ exec_tests(testSuiteHaltOnFailFlag);
Halt;
end;
if (dataPath <> '') and not(DirectoryExists(dataPath)) then begin
@@ -173,11 +196,11 @@ begin
end;
{$endif WINCE_TEST}
if not(
- FileExists(dataPath+'UCA_Rules_SHORT.xml') and
+ FileExists(dataPath+SROOT_RULES_FILE) and
FileExists(dataPath+'allkeys.txt')
)
then begin
- WriteLn(Format('File not found : %s or %s.',[dataPath+'UCA_Rules_SHORT.xml',dataPath+'allkeys.txt']));
+ WriteLn(Format('File not found : %s or %s.',[dataPath+SROOT_RULES_FILE,dataPath+'allkeys.txt']));
Halt(1);
end;
@@ -195,22 +218,35 @@ begin
binaryStreamOE := nil;
collation := TCldrCollation.Create();
try
- ParseCollationDocument(collationFileName,collation,TCldrParserMode.HeaderParsing);
- WriteLn(Format(' Collation Count = %d',[collation.ItemCount]));
- if (collation.ItemCount = 0) then begin
+ ParseCollationDocument2(collationFileName,collation,TCldrParserMode.HeaderParsing);
+ WriteLn(Format(' Collation Count = %d',[collation.FindPublicItemCount()]));
+ if (collation.FindPublicItemCount() = 0) then begin
WriteLn('No collation in this file.');
end else begin
- for i := 0 to collation.ItemCount - 1 do
- WriteLn(Format(' Item[%d] = (Type = %s)',[i, collation.Items[i].TypeName]));
- collationItem := collation.Find(collationTypeName);
+ for i := 0 to collation.ItemCount - 1 do begin
+ if not collation.Items[i].IsPrivate() then begin
+ s := collation.Items[i].TypeName;
+ if (collation.Items[i].Alt <> '') then
+ s := s + ', Alt = ' + collation.Items[i].Alt;
+ WriteLn(Format(' Item[%d] = (Type = %s)',[i,s]));
+ end;
+ end;
+ if (collationTypeAlt = '') then
+ collationItem := collation.Find(collationTypeName)
+ else
+ collationItem := collation.Find(collationTypeName,collationTypeAlt);
if (collationItem = nil) then begin
collationTypeName := FindCollationDefaultItemName(collation);
collationItem := collation.Find(collationTypeName);
+ collationTypeAlt := collationItem.Alt;
end;
- WriteLn(Format('Parsing Collation Item "%s" ...',[collationTypeName]));
- ParseCollationDocument(collationFileName,collationItem,collationTypeName);
+ s := collationTypeName;
+ if (collationTypeAlt <> '') then
+ s := Format('%s (%s)',[s,collationTypeAlt]);
+ WriteLn(Format('Parsing Collation Item "%s" ...',[s]));
+ ParseCollationDocument2(collationFileName,collationItem,collationTypeName);
- s := dataPath + 'UCA_Rules_SHORT.xml';
+ s := dataPath + SROOT_RULES_FILE;
WriteLn;
WriteLn('Parsing ',QuotedStr(s),' ...');
FillByte(orderedChars,SizeOf(orderedChars),0);
@@ -223,6 +259,9 @@ begin
s := dataPath + 'allkeys.txt';
stream.LoadFromFile(s);
ParseUCAFile(stream,ucaBook);
+ //WriteLn(' LEVEL-2''s items Value = ',CalcMaxLevel2Value(ucaBook.Lines));
+ //RewriteLevel2Values(@ucaBook.Lines[0],Length(ucaBook.Lines));
+ //WriteLn(' LEVEL-2''s items Value (after rewrite) = ',CalcMaxLevel2Value(ucaBook.Lines));
c := FillInitialPositions(@orderedChars.Data[0],orderedChars.ActualLength,ucaBook.Lines);
if (c > 0) then
WriteLn(' Missed Initial Positions = ',c);
@@ -240,18 +279,18 @@ begin
binaryStreamNE,binaryStreamOE,
orderedChars,ucaBook.Lines
);
- stream.SaveToFile(ExtractFilePath(collationFileName)+s);
+ stream.SaveToFile(outputPath+s);
if (streamNE.Size > 0) then begin
- streamNE.SaveToFile(ExtractFilePath(collationFileName)+GenerateEndianIncludeFileName(s,ENDIAN_NATIVE));
- streamOE.SaveToFile(ExtractFilePath(collationFileName)+GenerateEndianIncludeFileName(s,ENDIAN_NON_NATIVE));
+ streamNE.SaveToFile(outputPath+GenerateEndianIncludeFileName(s,ENDIAN_NATIVE));
+ streamOE.SaveToFile(outputPath+GenerateEndianIncludeFileName(s,ENDIAN_NON_NATIVE));
end;
if (binaryStreamNE.Size > 0) then begin
binaryStreamNE.SaveToFile(
- ExtractFilePath(collationFileName) +
+ outputPath +
ChangeFileExt(s,Format('_%s.bco',[ENDIAN_SUFFIX[ENDIAN_NATIVE]]))
);
binaryStreamOE.SaveToFile(
- ExtractFilePath(collationFileName) +
+ outputPath +
ChangeFileExt(s,Format('_%s.bco',[ENDIAN_SUFFIX[ENDIAN_NON_NATIVE]]))
);
end;
diff --git a/utils/unicode/cldrtest.pas b/utils/unicode/cldrtest.pas
index b5b74e545a..6f0abcbbb9 100644
--- a/utils/unicode/cldrtest.pas
+++ b/utils/unicode/cldrtest.pas
@@ -1,6 +1,6 @@
{ CLDR collation Algorithm test routines.
- Copyright (c) 2013 by Inoussa OUEDRAOGO
+ Copyright (c) 2013-2015 by Inoussa OUEDRAOGO
The source code is distributed under the Library GNU
General Public License with the following modification:
@@ -29,7 +29,7 @@ interface
uses
Classes, SysUtils,
- helper, cldrhelper, unicodedata;
+ unicodeset, helper, cldrhelper, unicodedata, cldrtxt, cldrxml;
function ToAnsiChars(const AValue : array of TUnicodeCodePoint) : string;
function DumpSequenceAnsi(const ASequence : TOrderedCharacters) : string;
@@ -42,7 +42,7 @@ uses
function ToWeight(const APrimary, ASecondary, ATertiary : Cardinal) : TUCA_WeightRecArray;overload;
function ToWeight(const AWeigths : array of Cardinal) : TUCA_WeightRecArray;overload;
- procedure exec_tests();
+ procedure exec_tests(const APropagateException : Boolean = True);
procedure test1();
procedure test2();
@@ -59,41 +59,363 @@ uses
procedure test13();
procedure test14();
procedure test15();
+ procedure test16a();
+ procedure test16b();
+ procedure test16c();
+ procedure test16d();
+ procedure test16e();
+
+ procedure test_parser_1();
+ procedure test_parser_2();
+ procedure test_parser_3();
+ procedure test_parser_4();
+ procedure test_parser_5();
+ procedure test_parser_6();
+ procedure test_parser_7();
+ procedure test_parser_8();
+ procedure test_parser_9();
+ procedure test_parser_abreviating_1();
+ procedure test_parser_abreviating_2();
+ procedure test_parser_abreviating_3();
+ procedure test_parser_abreviating_4();
+ procedure test_parser_abreviating_5();
+ procedure test_parser_abreviating_6();
+ procedure test_parser_abreviating_7();
+ procedure test_parser_abreviating_8();
+ procedure test_parser_abreviating_9();
+ procedure test_parser_abreviating_10();
+ procedure test_parser_contraction_1();
+ procedure test_parser_contraction_2();
+ procedure test_parser_expansion_1();
+ procedure test_parser_special_char_1();
+ procedure test_parser_special_char_2();
+ procedure test_parser_special_char_3();
+ procedure test_parser_special_char_4();
+ procedure test_parser_special_char_5();
+ procedure test_parser_special_char_6();
+ procedure test_parser_special_char_7();
+ procedure test_parser_skip_comment_1();
+ procedure test_parser_skip_comment_2();
+ procedure test_parser_skip_comment_3();
+ procedure test_parser_quoted_string_1();
+ procedure test_parser_quoted_string_2();
+ procedure test_parser_quoted_string_3();
+ procedure test_parser_quoted_string_4();
+ procedure test_parser_quoted_string_5();
+ procedure test_parser_quoted_string_6();
+ procedure test_parser_quoted_string_7();
+ procedure test_parser_quoted_string_8();
+ procedure test_parser_contexte_before_1();
+ procedure test_parser_contexte_before_2();
+ procedure test_parser_contexte_before_3();
+ procedure test_parser_contexte_before_4();
+ procedure test_parser_placement_before_1();
+ procedure test_parser_placement_before_2();
+ procedure test_parser_placement_before_3();
+ procedure test_parser_multi_unit_statement_line_1();
+ procedure test_parser_multi_unit_statement_line_2();
+ procedure test_parser_multi_unit_statement_line_3();
+ procedure test_parser_multi_statement_line_1();
+ procedure test_parser_multi_statement_line_2();
+ procedure test_parser_multi_statement_line_3();
+ procedure test_parser_multi_statement_line_4();
+
+ procedure test_parser_multi_line_statements_1();
+
+ procedure test_collation_parser_HeaderParsing();
+ procedure test_collation_parser_HeaderParsing_2();
+ procedure test_collation_parser_FullParsing();
+ procedure test_collation_parser_FullParsing_2();
+ procedure test_collation_parser_complete_rules();
+ procedure test_collation_parser_complete_rules_2();
+
+ procedure test_unicode_set_1();
+ procedure test_unicode_set_2();
+ procedure test_unicode_set_3();
implementation
+uses
+ typinfo;
+
+procedure do_exec_test(ATest : TProcedure; const APropagateException : Boolean);
+begin
+ if APropagateException then begin
+ ATest();
+ end else begin
+ try
+ ATest();
+ except
+ on e : Exception do begin
+ writeln(e.Message);
+ end;
+ end;
+ end;
+end;
-procedure exec_tests();
+procedure exec_utils_tests(const APropagateException : Boolean);
begin
+ WriteLn;WriteLn;WriteLn;WriteLn;
+ WriteLn('UTILITIES TESTS - START');
+ WriteLn('***************************** TEST UNICODESET 1 ******************');
+ do_exec_test(@test_unicode_set_1,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST UNICODESET 2 ******************');
+ do_exec_test(@test_unicode_set_2,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST UNICODESET 3 ******************');
+ do_exec_test(@test_unicode_set_3,APropagateException);
+ WriteLn;
+ WriteLn('UTILITIES TESTS - START');
+ WriteLn;
+ WriteLn;
+end;
+
+procedure exec_parser_tests(const APropagateException : Boolean);
+begin
+ WriteLn;WriteLn;WriteLn;WriteLn;
+ WriteLn('PARSER TESTS');
+ WriteLn('***************************** TEST PARSER 1 ******************');
+ do_exec_test(@test_parser_1,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST PARSER 2 ******************');
+ do_exec_test(@test_parser_2,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST PARSER 3 ******************');
+ do_exec_test(@test_parser_3,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST PARSER 4 ******************');
+ do_exec_test(@test_parser_4,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST PARSER 5 ******************');
+ do_exec_test(@test_parser_5,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST PARSER 6 ******************');
+ do_exec_test(@test_parser_6,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST PARSER 7 ******************');
+ do_exec_test(@test_parser_7,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST PARSER 8 ******************');
+ do_exec_test(@test_parser_7,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST PARSER 9 ******************');
+ do_exec_test(@test_parser_9,APropagateException);
+ WriteLn;
+ WriteLn;
+ WriteLn('***************************** TEST ABREVIATING 1 ******************');
+ do_exec_test(@test_parser_abreviating_1,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST ABREVIATING 2 ******************');
+ do_exec_test(@test_parser_abreviating_2,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST ABREVIATING 3 ******************');
+ do_exec_test(@test_parser_abreviating_3,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST ABREVIATING 4 ******************');
+ do_exec_test(@test_parser_abreviating_4,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST ABREVIATING 5 ******************');
+ do_exec_test(@test_parser_abreviating_5,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST ABREVIATING 6 ******************');
+ do_exec_test(@test_parser_abreviating_6,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST ABREVIATING 7 ******************');
+ do_exec_test(@test_parser_abreviating_7,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST ABREVIATING 8 ******************');
+ do_exec_test(@test_parser_abreviating_8,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST ABREVIATING 9 ******************');
+ do_exec_test(@test_parser_abreviating_9,APropagateException);
+ WriteLn;
+ WriteLn;
+ WriteLn('***************************** TEST ABREVIATING 10 ******************');
+ do_exec_test(@test_parser_abreviating_10,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST CONTRACTION 1 ******************');
+ do_exec_test(@test_parser_contraction_1,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST CONTRACTION 2 ******************');
+ do_exec_test(@test_parser_contraction_2,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST EXPANSION 1 ******************');
+ do_exec_test(@test_parser_expansion_1,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST SPECIAL CHAR 1 ******************');
+ do_exec_test(@test_parser_special_char_1,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST SPECIAL CHAR 2 ******************');
+ do_exec_test(@test_parser_special_char_2,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST SPECIAL CHAR 3 ******************');
+ do_exec_test(@test_parser_special_char_3,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST SPECIAL CHAR 4 ******************');
+ do_exec_test(@test_parser_special_char_4,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST SPECIAL CHAR 5 ******************');
+ do_exec_test(@test_parser_special_char_5,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST SPECIAL CHAR 6 ******************');
+ do_exec_test(@test_parser_special_char_6,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST SPECIAL CHAR 7 ******************');
+ do_exec_test(@test_parser_special_char_7,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST SKIP COMMENT 1 ******************');
+ do_exec_test(@test_parser_skip_comment_1,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST SKIP COMMENT 2 ******************');
+ do_exec_test(@test_parser_skip_comment_2,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST SKIP COMMENT 3 ******************');
+ do_exec_test(@test_parser_skip_comment_3,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST QUOTED STRING 1 ******************');
+ do_exec_test(@test_parser_quoted_string_1,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST QUOTED STRING 2 ******************');
+ do_exec_test(@test_parser_quoted_string_2,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST QUOTED STRING 3 ******************');
+ do_exec_test(@test_parser_quoted_string_3,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST QUOTED STRING 4 ******************');
+ do_exec_test(@test_parser_quoted_string_4,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST QUOTED STRING 5 ******************');
+ do_exec_test(@test_parser_quoted_string_5,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST QUOTED STRING 6 ******************');
+ do_exec_test(@test_parser_quoted_string_6,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST QUOTED STRING 7 ******************');
+ do_exec_test(@test_parser_quoted_string_7,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST QUOTED STRING 8 ******************');
+ do_exec_test(@test_parser_quoted_string_8,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST CONTEXT BEFORE 1 ******************');
+ do_exec_test(@test_parser_contexte_before_1,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST CONTEXT BEFORE 2 ******************');
+ do_exec_test(@test_parser_contexte_before_2,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST CONTEXT BEFORE 3 ******************');
+ do_exec_test(@test_parser_contexte_before_3,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST CONTEXT BEFORE 4 ******************');
+ do_exec_test(@test_parser_contexte_before_4,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST PLACEMENT BEFORE 1 ******************');
+ do_exec_test(@test_parser_placement_before_1,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST PLACEMENT BEFORE 2 ******************');
+ do_exec_test(@test_parser_placement_before_2,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST PLACEMENT BEFORE 3 ******************');
+ do_exec_test(@test_parser_placement_before_3,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST MULTI UNIT STATEMENT LINE 1 ******************');
+ do_exec_test(@test_parser_multi_unit_statement_line_1,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST MULTI UNIT STATEMENT LINE 2 ******************');
+ do_exec_test(@test_parser_multi_unit_statement_line_2,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST MULTI UNIT STATEMENT LINE 3 ******************');
+ do_exec_test(@test_parser_multi_unit_statement_line_3,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST MULTI STATEMENT LINE 1 ******************');
+ do_exec_test(@test_parser_multi_statement_line_1,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST MULTI STATEMENT LINE 2 ******************');
+ do_exec_test(@test_parser_multi_statement_line_2,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST MULTI STATEMENT LINE 3 ******************');
+ do_exec_test(@test_parser_multi_statement_line_3,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST MULTI STATEMENT LINE 4 ******************');
+ do_exec_test(@test_parser_multi_statement_line_4,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST MULTI LINE STATEMENTS 1 ******************');
+ do_exec_test(@test_parser_multi_line_statements_1,APropagateException);
+ WriteLn;
+ WriteLn;
+ WriteLn('***************************** TEST REPOSITORY 1 ******************');
+ do_exec_test(@test_collation_parser_HeaderParsing,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST REPOSITORY 2 ******************');
+ do_exec_test(@test_collation_parser_FullParsing,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST REPOSITORY 3 ******************');
+ do_exec_test(@test_collation_parser_complete_rules,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST REPOSITORY 4 ******************');
+ do_exec_test(@test_collation_parser_HeaderParsing_2,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST REPOSITORY 5 ******************');
+ do_exec_test(@test_collation_parser_FullParsing_2,APropagateException);
+ WriteLn;
+ WriteLn('***************************** TEST REPOSITORY 6 ******************');
+ do_exec_test(@test_collation_parser_complete_rules_2,APropagateException);
+ WriteLn;
+
+ WriteLn;
+ WriteLn;
+end;
+
+procedure exec_tests(const APropagateException : Boolean);
+begin
+ exec_utils_tests(APropagateException);
+
+ exec_parser_tests(APropagateException);
+
+ WriteLn('END PARSER TESTS');
+ WriteLn('*******************************************************');
+
WriteLn('***************************** TEST 1 ******************');
- test1();
+ do_exec_test(@test1,APropagateException);
WriteLn('***************************** TEST 2 ******************');
- test2();
+ do_exec_test(@test2,APropagateException);
WriteLn('***************************** TEST 3 ******************');
- test3();
+ do_exec_test(@test3,APropagateException);
WriteLn('***************************** TEST 4 ******************');
- test4();
+ do_exec_test(@test4,APropagateException);
WriteLn('***************************** TEST 5 ******************');
- test5();
+ do_exec_test(@test5,APropagateException);
WriteLn('***************************** TEST 6 ******************');
- test6();
+ do_exec_test(@test6,APropagateException);
WriteLn('***************************** TEST 7 ******************');
- test7();
+ do_exec_test(@test7,APropagateException);
WriteLn('***************************** TEST 8 ******************');
- test8();
+ do_exec_test(@test8,APropagateException);
WriteLn('***************************** TEST 9 ******************');
- test9();
+ do_exec_test(@test9,APropagateException);
WriteLn('***************************** TEST 10 ******************');
- test10();
+ do_exec_test(@test10,APropagateException);
WriteLn('***************************** TEST 11 ******************');
- test11();
+ do_exec_test(@test11,APropagateException);
WriteLn('***************************** TEST 12 ******************');
- test12();
+ do_exec_test(@test12,APropagateException);
WriteLn('***************************** TEST 13 ******************');
- test13();
+ do_exec_test(@test13,APropagateException);
WriteLn('***************************** TEST 14 ******************');
- test14();
+ do_exec_test(@test14,APropagateException);
WriteLn('***************************** TEST 15 ******************');
- test15();
+ do_exec_test(@test15,APropagateException);
+ WriteLn('***************************** TEST 16 A ******************');
+ do_exec_test(@test16a,APropagateException);
+ WriteLn('***************************** TEST 16 B ******************');
+ do_exec_test(@test16b,APropagateException);
+ WriteLn('***************************** TEST 16 C ******************');
+ do_exec_test(@test16c,APropagateException);
+ WriteLn('***************************** TEST 16 D ******************');
+ do_exec_test(@test16d,APropagateException);
+ WriteLn('***************************** TEST 16 E ******************');
+ do_exec_test(@test16e,APropagateException);
+
+ WriteLn('**** END TESTS');
end;
function ToAnsiChars(const AValue : array of TUnicodeCodePoint) : string;
@@ -140,7 +462,10 @@ begin
Inc(p);
for i := i to ASequence.ActualLength - 1 do begin
//WriteStr(s,s,AnsiChar(p^.Characters[0]),' <',(1+Ord(p^.WeigthKind)),' ');
- WriteStr(s,s,'<',(1+Ord(p^.WeigthKind)),' ',ToAnsiChars(p^.Characters),' ');
+ if p^.IsVirtual() then
+ WriteStr(s,s,' [',GetEnumName(TypeInfo(TReorderLogicalReset),Ord(p^.VirtualPosition)),'] ')
+ else
+ WriteStr(s,s,'<',(1+Ord(p^.WeigthKind)),' ',ToAnsiChars(p^.Characters),' ');
Inc(p);
end;
end;
@@ -478,7 +803,7 @@ begin
SetLength(statement.Reset,1);
statement.Reset[0] := Ord('a');
- SetLength(statement.Elements,1);
+ statement.SetElementCount(1);
statement.Elements[0] := TReorderUnit.From(Ord('g'),TReorderWeigthKind.Primary,0);
sequence.ApplyStatement(@statement);
WriteLn('Statement #1 = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak);
@@ -491,7 +816,7 @@ begin
SetLength(statement.Reset,1);
statement.Reset[0] := Ord('a');
- SetLength(statement.Elements,2);
+ statement.SetElementCount(2);
statement.Elements[0] := TReorderUnit.From(Ord('h'),TReorderWeigthKind.Primary,0);
statement.Elements[1] := TReorderUnit.From(Ord('k'),TReorderWeigthKind.Primary,0);
sequence.ApplyStatement(@statement);
@@ -504,7 +829,7 @@ begin
SetLength(statement.Reset,1);
statement.Reset[0] := Ord('h');
- SetLength(statement.Elements,1);
+ statement.SetElementCount(1);
statement.Elements[0] := TReorderUnit.From(Ord('g'),TReorderWeigthKind.Secondary,0);
sequence.ApplyStatement(@statement);
WriteLn('Statement #3 = ',sLineBreak,' ',DumpSequenceAnsi(sequence));
@@ -3264,4 +3589,1916 @@ begin
end;
+procedure test16_prepareWeigth(var AData : TUCA_LineRecArray);
+var
+ p : PUCA_LineRec;
+begin
+ SetLength(AData,3);
+ p := @AData[Low(AData)];
+ p^.CodePoints := CodePointToArray(Ord('a'));
+ p^.Weights := ToWeight($15EF,$0020,$0002);
+ Inc(p);
+ p^.CodePoints := CodePointToArray(Ord('b'));
+ p^.Weights := ToWeight($1605,$0020,$0002);
+ Inc(p);
+ p^.CodePoints := CodePointToArray(Ord('c'));
+ p^.Weights := ToWeight($161D,$0020,$0002);
+end;
+
+procedure test16a();
+var
+ sequence : TOrderedCharacters;
+ statement : TReorderSequence;
+ wfirst, wresult : TUCA_LineRecArray;
+ i : Integer;
+ unicodeBook1, unicodeBook2 : unicodedata.TUCA_DataBook;
+begin
+ statement.Clear();
+ test16_prepareWeigth(wfirst);
+ sequence := TOrderedCharacters.Create();
+ sequence.Append(TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,1));
+ sequence.Append(TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,2));
+ sequence.Append(TReorderUnit.From(TReorderLogicalReset.LastRegular));
+ sequence.Append(TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,3));
+ for i := 0 to sequence.ActualLength - 1 do
+ sequence.Data[i].Changed := False;
+ WriteLn('Initial = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak);
+ WriteLn(DumpLines(wfirst),sLineBreak+sLineBreak);
+ ConstructUnicodeBook(wfirst,'test1','first',nil,unicodeBook1);
+ CheckInf(['a','b','c'],@unicodeBook1);
+
+ // --- test 1
+ SetLength(statement.Reset,1);
+ statement.LogicalPosition := TReorderLogicalReset.LastRegular;
+ SetLength(statement.Elements,1);
+ statement.Elements[0] := TReorderUnit.From(Ord('x'),TReorderWeigthKind.Primary,0);
+ sequence.ApplyStatement(@statement);
+ WriteLn('Statement #1 = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak);
+ wresult := nil;
+ ComputeWeigths(@sequence.Data[0],sequence.ActualLength,wfirst,wresult);
+ WriteLn(DumpLines(wresult),sLineBreak+sLineBreak);
+ ConstructUnicodeBook(wresult,'test1','1',@unicodeBook1,unicodeBook2);
+ CheckInf(['a','b','x'{*},'c'],@unicodeBook2);
+ WriteLn(' -- test 1 - ok');
+end;
+
+procedure test16b();
+var
+ sequence : TOrderedCharacters;
+ statement : TReorderSequence;
+ wfirst, wresult : TUCA_LineRecArray;
+ i : Integer;
+ unicodeBook1, unicodeBook2 : unicodedata.TUCA_DataBook;
+begin
+ statement.Clear();
+ test16_prepareWeigth(wfirst);
+ sequence := TOrderedCharacters.Create();
+ sequence.Append(TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,1));
+ sequence.Append(TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,2));
+ sequence.Append(TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,3));
+ for i := 0 to sequence.ActualLength - 1 do
+ sequence.Data[i].Changed := False;
+ WriteLn('Initial = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak);
+ WriteLn(DumpLines(wfirst),sLineBreak+sLineBreak);
+ ConstructUnicodeBook(wfirst,'test1','first',nil,unicodeBook1);
+ CheckInf(['a','b','c'],@unicodeBook1);
+
+ // --- test 1
+ SetLength(statement.Reset,1);
+ statement.LogicalPosition := TReorderLogicalReset.LastRegular;
+ SetLength(statement.Elements,1);
+ statement.Elements[0] := TReorderUnit.From(Ord('x'),TReorderWeigthKind.Primary,0);
+ sequence.ApplyStatement(@statement);
+ WriteLn('Statement #1 = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak);
+ wresult := nil;
+ ComputeWeigths(@sequence.Data[0],sequence.ActualLength,wfirst,wresult);
+ WriteLn(DumpLines(wresult),sLineBreak+sLineBreak);
+ ConstructUnicodeBook(wresult,'test1','1',@unicodeBook1,unicodeBook2);
+ CheckInf(['a','b','c','x'{*}],@unicodeBook2);
+ WriteLn(' -- test 1 - ok');
+ writeln;
+ writeln;
+
+ // test 2
+ statement.Clear();
+ SetLength(statement.Reset,1);
+ statement.Reset[0] := Ord('x');
+ SetLength(statement.Elements,1);
+ statement.Elements[0] := TReorderUnit.From(Ord('y'),TReorderWeigthKind.Primary,0);
+ sequence.ApplyStatement(@statement);
+ WriteLn('Statement #2 = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak);
+ wresult := nil;
+ ComputeWeigths(@sequence.Data[0],sequence.ActualLength,wfirst,wresult);
+ WriteLn(DumpLines(wresult),sLineBreak+sLineBreak);
+ ConstructUnicodeBook(wresult,'test2','2',@unicodeBook1,unicodeBook2);
+ CheckInf(['a','b','c','x'{*},'y'{*}],@unicodeBook2);
+ WriteLn(' -- test 2 - ok');
+end;
+
+procedure test16c();
+var
+ sequence : TOrderedCharacters;
+ statement : TReorderSequence;
+ wfirst, wresult : TUCA_LineRecArray;
+ i : Integer;
+ unicodeBook1, unicodeBook2 : unicodedata.TUCA_DataBook;
+begin
+ statement.Clear();
+ test16_prepareWeigth(wfirst);
+ sequence := TOrderedCharacters.Create();
+ sequence.Append(TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,1));
+ sequence.Append(TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,2));
+ sequence.Append(TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,3));
+ for i := 0 to sequence.ActualLength - 1 do
+ sequence.Data[i].Changed := False;
+ WriteLn('Initial = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak);
+ WriteLn(DumpLines(wfirst),sLineBreak+sLineBreak);
+ ConstructUnicodeBook(wfirst,'test1','first',nil,unicodeBook1);
+ CheckInf(['a','b','c'],@unicodeBook1);
+
+ // --- test 1
+ SetLength(statement.Reset,1);
+ statement.LogicalPosition := TReorderLogicalReset.LastRegular;
+ SetLength(statement.Elements,1);
+ statement.Elements[0] := TReorderUnit.From(Ord('x'),TReorderWeigthKind.Secondary,0);
+ sequence.ApplyStatement(@statement);
+ WriteLn('Statement #1 = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak);
+ wresult := nil;
+ ComputeWeigths(@sequence.Data[0],sequence.ActualLength,wfirst,wresult);
+ WriteLn(DumpLines(wresult),sLineBreak+sLineBreak);
+ ConstructUnicodeBook(wresult,'test1','1',@unicodeBook1,unicodeBook2);
+ CheckInf(['a','b','c','x'{*}],@unicodeBook2);
+ WriteLn(' -- test 1 - ok');
+ writeln;
+ writeln;
+
+ // test 2
+ statement.Clear();
+ SetLength(statement.Reset,1);
+ statement.Reset[0] := Ord('x');
+ SetLength(statement.Elements,1);
+ statement.Elements[0] := TReorderUnit.From(Ord('y'),TReorderWeigthKind.Primary,0);
+ sequence.ApplyStatement(@statement);
+ WriteLn('Statement #2 = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak);
+ wresult := nil;
+ ComputeWeigths(@sequence.Data[0],sequence.ActualLength,wfirst,wresult);
+ WriteLn(DumpLines(wresult),sLineBreak+sLineBreak);
+ ConstructUnicodeBook(wresult,'test2','2',@unicodeBook1,unicodeBook2);
+ CheckInf(['a','b','c','x'{*},'y'{*}],@unicodeBook2);
+ WriteLn(' -- test 2 - ok');
+end;
+
+procedure test16d();
+var
+ sequence : TOrderedCharacters;
+ statement : TReorderSequence;
+ wfirst, wresult : TUCA_LineRecArray;
+ i : Integer;
+ unicodeBook1, unicodeBook2 : unicodedata.TUCA_DataBook;
+begin
+ statement.Clear();
+ test16_prepareWeigth(wfirst);
+ sequence := TOrderedCharacters.Create();
+ sequence.Append(TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,1));
+ sequence.Append(TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,2));
+ sequence.Append(TReorderUnit.From(TReorderLogicalReset.LastRegular));
+ sequence.Append(TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,3));
+ for i := 0 to sequence.ActualLength - 1 do
+ sequence.Data[i].Changed := False;
+ WriteLn('Initial = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak);
+ WriteLn(DumpLines(wfirst),sLineBreak+sLineBreak);
+ ConstructUnicodeBook(wfirst,'test1','first',nil,unicodeBook1);
+ CheckInf(['a','b','c'],@unicodeBook1);
+
+ // --- test 1
+ SetLength(statement.Reset,1);
+ statement.LogicalPosition := TReorderLogicalReset.LastRegular;
+ statement.Before := True;
+ SetLength(statement.Elements,1);
+ statement.Elements[0] := TReorderUnit.From(Ord('x'),TReorderWeigthKind.Primary,0);
+ sequence.ApplyStatement(@statement);
+ WriteLn('Statement #1 = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak);
+ wresult := nil;
+ ComputeWeigths(@sequence.Data[0],sequence.ActualLength,wfirst,wresult);
+ WriteLn(DumpLines(wresult),sLineBreak+sLineBreak);
+ ConstructUnicodeBook(wresult,'test1','1',@unicodeBook1,unicodeBook2);
+ CheckInf(['a','x'{*},'b','c'],@unicodeBook2);
+ WriteLn(' -- test 1 - ok');
+end;
+
+procedure test16e_prepareWeigth(var AData : TUCA_LineRecArray);
+var
+ p : PUCA_LineRec;
+begin
+ SetLength(AData,5);
+ p := @AData[Low(AData)];
+ p^.CodePoints := CodePointToArray(Ord('a'));
+ p^.Weights := ToWeight(1,10,10);
+ Inc(p);
+ p^.CodePoints := CodePointToArray(Ord('b'));
+ p^.Weights := ToWeight(2,10,10);
+ Inc(p);
+ p^.CodePoints := CodePointToArray([Ord('b'),Ord('2')]);
+ p^.Weights := ToWeight(2,20,10);
+ Inc(p);
+ p^.CodePoints := CodePointToArray([Ord('b'),Ord('3')]);
+ p^.Weights := ToWeight(2,20,20);
+ Inc(p);
+ p^.CodePoints := CodePointToArray(Ord('c'));
+ p^.Weights := ToWeight(3,10,10);
+end;
+
+procedure test16e();
+var
+ sequence : TOrderedCharacters;
+ statement : TReorderSequence;
+ wfirst, wresult : TUCA_LineRecArray;
+ i : Integer;
+ unicodeBook1, unicodeBook2 : unicodedata.TUCA_DataBook;
+begin
+ statement.Clear();
+ test16e_prepareWeigth(wfirst);
+ sequence := TOrderedCharacters.Create();
+ sequence.Append(TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,1));
+ sequence.Append(TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,2));
+ sequence.Append(TReorderUnit.From([Ord('b'),Ord('2')],TReorderWeigthKind.Secondary,3));
+ sequence.Append(TReorderUnit.From([Ord('b'),Ord('3')],TReorderWeigthKind.Tertiary,4));
+ sequence.Append(TReorderUnit.From(TReorderLogicalReset.LastRegular));
+ sequence.Append(TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,5));
+ for i := 0 to sequence.ActualLength - 1 do
+ sequence.Data[i].Changed := False;
+ WriteLn('Initial = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak);
+ WriteLn(DumpLines(wfirst),sLineBreak+sLineBreak);
+ ConstructUnicodeBook(wfirst,'test1','first',nil,unicodeBook1);
+ CheckInf(['a','b','b2','b3','c'],@unicodeBook1);
+
+ // --- test 1
+ SetLength(statement.Reset,1);
+ statement.LogicalPosition := TReorderLogicalReset.LastRegular;
+ statement.Before := True;
+ SetLength(statement.Elements,1);
+ statement.Elements[0] := TReorderUnit.From(Ord('x'),TReorderWeigthKind.Secondary,0);
+ sequence.ApplyStatement(@statement);
+ WriteLn('Statement #1 = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak);
+ wresult := nil;
+ ComputeWeigths(@sequence.Data[0],sequence.ActualLength,wfirst,wresult);
+ WriteLn(DumpLines(wresult),sLineBreak+sLineBreak);
+ ConstructUnicodeBook(wresult,'test1','1',@unicodeBook1,unicodeBook2);
+ CheckInf(['a','b','x'{*},'b2','b3','c'],@unicodeBook2);
+ WriteLn(' -- test 1 - ok');
+end;
+
+procedure CheckEqual(A,B : array of TUnicodeCodePoint; const AMsg : string);overload;
+var
+ i : Integer;
+begin
+ Check((Length(A)=Length(B)),'Length() <>');
+ if (Length(A) > 0) then begin
+ for i := Low(A) to High(A) do
+ Check(A[i] = B[i],'%s, A[%d] <>',[AMsg,i]);
+ end;
+end;
+
+procedure CheckEqual(A,B : TReorderUnit; const AMsg : string);overload;
+var
+ i : Integer;
+begin
+ Check((A.VirtualPosition=B.VirtualPosition),'VirtualPosition <>');
+ Check((A.InitialPosition=B.InitialPosition),'InitialPosition <>');
+ Check((A.Changed=B.Changed),'Changed <>');
+ Check((A.WeigthKind=B.WeigthKind),'WeigthKind <>');
+ CheckEqual(A.Context,B.Context,'Context');
+ CheckEqual(A.ExpansionChars,B.ExpansionChars,'ExpansionChars');
+ CheckEqual(A.Characters,B.Characters,'Characters');
+ CheckEqual(A.Context,B.Context,'Context');
+end;
+
+procedure CheckEqual(A,B : TReorderSequence);overload;
+var
+ i : Integer;
+begin
+ Check((A.LogicalPosition=B.LogicalPosition),'LogicalPosition <>');
+ Check((A.Before=B.Before),'Before <>');
+ CheckEqual(A.Reset,B.Reset,'Reset');
+ Check((Length(A.Elements)=Length(B.Elements)),'Length(Elements) <>');
+ for i := Low(A.Elements) to High(A.Elements) do
+ CheckEqual(A.Elements[i],B.Elements[i],Format('Elements[%d]',[i]));
+end;
+
+function CountLines(const AStr : ansistring) : Integer;
+var
+ c, i : Integer;
+begin
+ c := 0;
+ for i := 1 to Length(AStr) do begin
+ if (AStr[i] = #10) then
+ c := c+1;
+ end;
+ if (c = 0) and (AStr <> '') then
+ c := c+1;
+ Result := c;
+end;
+
+procedure do_test_parser(
+ AText : ansistring;
+ const AExpected : TReorderSequence;
+ const ALineCount : Integer
+);overload;
+var
+ locText : UTF8String;
+ locTextPointer : PAnsiChar;
+ locStartPosition,
+ locMaxLen : Integer;
+ locStatement : TReorderSequence;
+ locNextPos,
+ locLineCount : Integer;
+begin
+ locText := AText;
+ WriteLn('Parsing "',locText,'" ...');
+ locTextPointer := @locText[1];
+ locMaxLen := Length(locText);
+ locStartPosition := 0;
+ locNextPos := 0;
+ locLineCount := 0;
+ locStatement.Clear();
+ Check(
+ ParseStatement(
+ locTextPointer,locStartPosition,locMaxLen,@locStatement,locNextPos,locLineCount
+ ),
+ 'Fail to Parse : "%s".', [locText]
+ );
+ if (locLineCount > 1) then
+ WriteLn;
+ WriteLn(' Next Position : ',locNextPos);
+ WriteLn(' Line Count : ',locLineCount);
+ if (CountLines(locText) = 1) then
+ Check((locNextPos>=locMaxLen),'Next Position');
+ if (ALineCount > 0) then
+ Check((locLineCount=ALineCount),'Line Count');
+ CheckEqual(locStatement,AExpected);
+
+ WriteLn(' -- test ok');
+end;
+
+procedure do_test_parser(AText : ansistring; const AExpected : TReorderSequence);inline;overload;
+begin
+ do_test_parser(AText,AExpected,1);
+end;
+
+procedure test_parser_1();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ locStatement.LogicalPosition := TReorderLogicalReset.LastTertiaryIgnorable;
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Identity,0);
+ do_test_parser('& [last tertiary ignorable] = a',locStatement);
+end;
+
+procedure test_parser_2();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ locStatement.LogicalPosition := TReorderLogicalReset.LastTertiaryIgnorable;
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,0);
+ do_test_parser('& [last tertiary ignorable] < b',locStatement);
+end;
+
+procedure test_parser_3();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ locStatement.LogicalPosition := TReorderLogicalReset.LastTertiaryIgnorable;
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Secondary,0);
+ do_test_parser('& [last tertiary ignorable] << c',locStatement);
+end;
+
+procedure test_parser_4();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ locStatement.LogicalPosition := TReorderLogicalReset.LastTertiaryIgnorable;
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('d'),TReorderWeigthKind.Tertiary,0);
+ do_test_parser('& [last tertiary ignorable] <<< d',locStatement);
+end;
+
+procedure test_parser_5();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ locStatement.LogicalPosition := TReorderLogicalReset.LastTertiaryIgnorable;
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From(1,TReorderWeigthKind.Primary,0);
+ do_test_parser('& [last tertiary ignorable] < ''\u0001''',locStatement);
+end;
+
+procedure test_parser_6();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ locStatement.LogicalPosition := TReorderLogicalReset.LastTertiaryIgnorable;
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From(7,TReorderWeigthKind.Secondary,0);
+ do_test_parser('& [last tertiary ignorable] << ''\u0007''',locStatement);
+end;
+
+procedure test_parser_7();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ locStatement.LogicalPosition := TReorderLogicalReset.LastTertiaryIgnorable;
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From(9,TReorderWeigthKind.Secondary,0);
+ do_test_parser('& [last tertiary ignorable] << ''\u0009''',locStatement);
+end;
+
+procedure test_parser_8();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ locStatement.LogicalPosition := TReorderLogicalReset.LastTertiaryIgnorable;
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From($000110BD,TReorderWeigthKind.Primary,0);
+ do_test_parser('& [last tertiary ignorable] < ''\U000110BD''',locStatement);
+end;
+
+procedure test_parser_9();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0);
+ do_test_parser('&x < a',locStatement);
+end;
+
+procedure test_parser_abreviating_1();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(3);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,0);
+ do_test_parser('&x <* abc',locStatement);
+end;
+
+procedure test_parser_abreviating_2();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(7);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[3] := TReorderUnit.From(Ord('d'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[4] := TReorderUnit.From(Ord('e'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[5] := TReorderUnit.From(Ord('f'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[6] := TReorderUnit.From(Ord('g'),TReorderWeigthKind.Primary,0);
+ do_test_parser('&x <* abcd-g',locStatement);
+end;
+
+procedure test_parser_abreviating_3();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(8);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[3] := TReorderUnit.From(Ord('d'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[4] := TReorderUnit.From(Ord('e'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[5] := TReorderUnit.From(Ord('f'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[6] := TReorderUnit.From(Ord('g'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[7] := TReorderUnit.From(Ord('p'),TReorderWeigthKind.Primary,0);
+ do_test_parser('&x <* abcd-gp',locStatement);
+end;
+
+procedure test_parser_abreviating_4();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(11);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[3] := TReorderUnit.From(Ord('d'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[4] := TReorderUnit.From(Ord('e'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[5] := TReorderUnit.From(Ord('f'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[6] := TReorderUnit.From(Ord('g'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[7] := TReorderUnit.From(Ord('p'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[8] := TReorderUnit.From(Ord('q'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[9] := TReorderUnit.From(Ord('r'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[10] := TReorderUnit.From(Ord('s'),TReorderWeigthKind.Primary,0);
+ do_test_parser('&x <* abcd-gp-s',locStatement);
+end;
+
+procedure test_parser_abreviating_5();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(3);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Secondary,0);
+ locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Secondary,0);
+ locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Secondary,0);
+ do_test_parser('&x <<* abc',locStatement);
+end;
+
+procedure test_parser_abreviating_6();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(11);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Secondary,0);
+ locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Secondary,0);
+ locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Secondary,0);
+ locStatement.Elements[3] := TReorderUnit.From(Ord('d'),TReorderWeigthKind.Secondary,0);
+ locStatement.Elements[4] := TReorderUnit.From(Ord('e'),TReorderWeigthKind.Secondary,0);
+ locStatement.Elements[5] := TReorderUnit.From(Ord('f'),TReorderWeigthKind.Secondary,0);
+ locStatement.Elements[6] := TReorderUnit.From(Ord('g'),TReorderWeigthKind.Secondary,0);
+ locStatement.Elements[7] := TReorderUnit.From(Ord('p'),TReorderWeigthKind.Secondary,0);
+ locStatement.Elements[8] := TReorderUnit.From(Ord('q'),TReorderWeigthKind.Secondary,0);
+ locStatement.Elements[9] := TReorderUnit.From(Ord('r'),TReorderWeigthKind.Secondary,0);
+ locStatement.Elements[10] := TReorderUnit.From(Ord('s'),TReorderWeigthKind.Secondary,0);
+ do_test_parser('&x <<* abcd-gp-s',locStatement);
+end;
+
+procedure test_parser_abreviating_7();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(3);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Tertiary,0);
+ locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Tertiary,0);
+ locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Tertiary,0);
+ do_test_parser('&x <<<* abc',locStatement);
+end;
+
+procedure test_parser_abreviating_8();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(11);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Tertiary,0);
+ locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Tertiary,0);
+ locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Tertiary,0);
+ locStatement.Elements[3] := TReorderUnit.From(Ord('d'),TReorderWeigthKind.Tertiary,0);
+ locStatement.Elements[4] := TReorderUnit.From(Ord('e'),TReorderWeigthKind.Tertiary,0);
+ locStatement.Elements[5] := TReorderUnit.From(Ord('f'),TReorderWeigthKind.Tertiary,0);
+ locStatement.Elements[6] := TReorderUnit.From(Ord('g'),TReorderWeigthKind.Tertiary,0);
+ locStatement.Elements[7] := TReorderUnit.From(Ord('p'),TReorderWeigthKind.Tertiary,0);
+ locStatement.Elements[8] := TReorderUnit.From(Ord('q'),TReorderWeigthKind.Tertiary,0);
+ locStatement.Elements[9] := TReorderUnit.From(Ord('r'),TReorderWeigthKind.Tertiary,0);
+ locStatement.Elements[10] := TReorderUnit.From(Ord('s'),TReorderWeigthKind.Tertiary,0);
+ do_test_parser('&x <<<* abcd-gp-s',locStatement);
+end;
+
+procedure test_parser_abreviating_9();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(3);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Identity,0);
+ locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Identity,0);
+ locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Identity,0);
+ do_test_parser('&x =* abc',locStatement);
+end;
+
+procedure test_parser_abreviating_10();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(11);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Identity,0);
+ locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Identity,0);
+ locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Identity,0);
+ locStatement.Elements[3] := TReorderUnit.From(Ord('d'),TReorderWeigthKind.Identity,0);
+ locStatement.Elements[4] := TReorderUnit.From(Ord('e'),TReorderWeigthKind.Identity,0);
+ locStatement.Elements[5] := TReorderUnit.From(Ord('f'),TReorderWeigthKind.Identity,0);
+ locStatement.Elements[6] := TReorderUnit.From(Ord('g'),TReorderWeigthKind.Identity,0);
+ locStatement.Elements[7] := TReorderUnit.From(Ord('p'),TReorderWeigthKind.Identity,0);
+ locStatement.Elements[8] := TReorderUnit.From(Ord('q'),TReorderWeigthKind.Identity,0);
+ locStatement.Elements[9] := TReorderUnit.From(Ord('r'),TReorderWeigthKind.Identity,0);
+ locStatement.Elements[10] := TReorderUnit.From(Ord('s'),TReorderWeigthKind.Identity,0);
+ do_test_parser('&x =* abcd-gp-s',locStatement);
+end;
+
+procedure test_parser_contraction_1();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('k');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From([Ord('c'),Ord('h')],TReorderWeigthKind.Primary,0);
+ do_test_parser('&k < ch',locStatement);
+end;
+
+procedure test_parser_contraction_2();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,3);
+ locStatement.Reset[0] := Ord('a');
+ locStatement.Reset[1] := Ord('b');
+ locStatement.Reset[2] := Ord('c');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From([Ord('c'),Ord('h')],TReorderWeigthKind.Primary,0);
+ do_test_parser('&abc < ch',locStatement);
+end;
+
+procedure test_parser_expansion_1();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('a');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('z'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[0].ExpansionChars := CodePointToArray(Ord('e'));
+ do_test_parser('&a < z/e',locStatement);
+end;
+
+procedure test_parser_special_char_1();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('/'),TReorderWeigthKind.Primary,0);
+ do_test_parser('&x < ''/''',locStatement);
+end;
+
+procedure test_parser_special_char_2();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('&'),TReorderWeigthKind.Primary,0);
+ do_test_parser('&x < ''&''',locStatement);
+end;
+
+procedure test_parser_special_char_3();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('<'),TReorderWeigthKind.Primary,0);
+ do_test_parser('&x < ''<''',locStatement);
+end;
+
+procedure test_parser_special_char_4();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('|'),TReorderWeigthKind.Primary,0);
+ do_test_parser('&x < ''|''',locStatement);
+end;
+
+procedure test_parser_special_char_5();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('*'),TReorderWeigthKind.Primary,0);
+ do_test_parser('&x < ''*''',locStatement);
+end;
+
+procedure test_parser_special_char_6();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('['),TReorderWeigthKind.Primary,0);
+ do_test_parser('&x < ''[''',locStatement);
+end;
+
+procedure test_parser_special_char_7();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From(Ord(']'),TReorderWeigthKind.Primary,0);
+ do_test_parser('&x < '']''',locStatement);
+end;
+
+procedure test_parser_skip_comment_1();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0);
+ do_test_parser(
+ '&x #' + sLineBreak +
+ ' < a',
+ locStatement, 2
+ );
+end;
+
+procedure test_parser_skip_comment_2();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0);
+ do_test_parser(
+ '&x # hello' + sLineBreak +
+ ' < a',
+ locStatement, 2
+ );
+end;
+
+procedure test_parser_skip_comment_3();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0);
+ do_test_parser(
+ '&x # hello' + sLineBreak +
+ sLineBreak +
+ #9#9' ' + sLineBreak +
+ ' < a',
+ locStatement, 4
+ );
+end;
+
+procedure test_parser_quoted_string_1();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] :=
+ TReorderUnit.From(
+ [Ord('<'),Ord('#'),Ord('|'),Ord('/'),Ord('!')],
+ TReorderWeigthKind.Primary,0
+ );
+ do_test_parser('&x < ''<#|/!''',locStatement);
+end;
+
+procedure test_parser_quoted_string_2();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] :=
+ TReorderUnit.From(
+ [Ord('<'),Ord('#'),Ord('|'),Ord('/'),Ord('!'),Ord('A')],
+ TReorderWeigthKind.Primary,0
+ );
+ do_test_parser('&x < ''<#|/!''A',locStatement);
+end;
+
+procedure test_parser_quoted_string_3();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] :=
+ TReorderUnit.From(
+ [Ord('<'),Ord('#'),Ord('|'),Ord('/'),Ord('!')],
+ TReorderWeigthKind.Primary,0
+ );
+ do_test_parser('&x < ''<#|/!''#',locStatement);
+end;
+
+procedure test_parser_quoted_string_4();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] :=
+ TReorderUnit.From(
+ [Ord('<'),Ord('#'),Ord('|'),Ord('/'),Ord('!'),Ord('A')],
+ TReorderWeigthKind.Primary,0
+ );
+ do_test_parser('&x < ''<#|/!''A#',locStatement);
+end;
+
+procedure test_parser_quoted_string_5();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,3);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.Reset[1] := Ord('-');
+ locStatement.Reset[2] := Ord('y');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] :=
+ TReorderUnit.From(Ord('k'),TReorderWeigthKind.Tertiary,0);
+ do_test_parser('&''x''-''y''<<<k',locStatement);
+end;
+
+procedure test_parser_quoted_string_6();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] :=
+ TReorderUnit.From(Ord('|'),TReorderWeigthKind.Primary,0);
+ do_test_parser('&x < ''|''',locStatement);
+end;
+
+procedure test_parser_quoted_string_7();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] :=
+ TReorderUnit.From([Ord('a'),Ord('|')],TReorderWeigthKind.Primary,0);
+ do_test_parser('&x < a''|''',locStatement);
+end;
+
+procedure test_parser_quoted_string_8();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] :=
+ TReorderUnit.From([Ord('a'),Ord('|'),Ord('c')],TReorderWeigthKind.Primary,0);
+ do_test_parser('&x < a''|''c',locStatement);
+end;
+
+procedure test_parser_contexte_before_1();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] :=
+ TReorderUnit.From(Ord('-'),[Ord('a')],TReorderWeigthKind.Secondary,0);
+ do_test_parser('&x << a|-',locStatement);
+end;
+
+procedure test_parser_contexte_before_2();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('a');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] :=
+ TReorderUnit.From(Ord('-'),[Ord('a')],TReorderWeigthKind.Tertiary,0);
+ do_test_parser('&a <<< a|-',locStatement);
+end;
+
+procedure test_parser_contexte_before_3();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] :=
+ TReorderUnit.From(
+ Ord('-'),[Ord('a'),Ord('z'),Ord('k')],TReorderWeigthKind.Secondary,0
+ );
+ do_test_parser('&x << azk|-',locStatement);
+end;
+
+procedure test_parser_contexte_before_4();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] :=
+ TReorderUnit.From(
+ [Ord('a'),Ord(':')],[Ord('a'),Ord('z'),Ord('k')],
+ TReorderWeigthKind.Secondary,0
+ );
+ do_test_parser('&x << azk|a:',locStatement);
+end;
+
+procedure test_parser_placement_before_1();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.Before := True;
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('k'),TReorderWeigthKind.Secondary,0);
+ do_test_parser('&[before 2] x << k',locStatement);
+end;
+
+procedure test_parser_placement_before_2();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.Before := True;
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] :=
+ TReorderUnit.From([Ord('z'),Ord('k')],TReorderWeigthKind.Tertiary,0);
+ do_test_parser('&[before 3] x <<< zk',locStatement);
+end;
+
+procedure test_parser_placement_before_3();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.Before := True;
+ locStatement.SetElementCount(1);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('z'),TReorderWeigthKind.Primary,0);
+ do_test_parser('&[before 1] x < z',locStatement);
+end;
+
+procedure test_parser_multi_unit_statement_line_1();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(3);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,0);
+ do_test_parser('&x < a < b < c',locStatement);
+ do_test_parser('&x <a <b <c',locStatement);
+ do_test_parser('&x <a<b<c',locStatement);
+end;
+
+procedure test_parser_multi_unit_statement_line_2();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(3);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0);
+ locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Secondary,0);
+ locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Tertiary,0);
+ do_test_parser('&x < a << b <<< c',locStatement);
+ do_test_parser('&x <a <<b <<<c',locStatement);
+ do_test_parser('&x <a<<b<<<c',locStatement);
+end;
+
+procedure test_parser_multi_unit_statement_line_3();
+var
+ locStatement : TReorderSequence;
+begin
+ locStatement.Clear();
+ SetLength(locStatement.Reset,1);
+ locStatement.Reset[0] := Ord('x');
+ locStatement.SetElementCount(3);
+ locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Tertiary,0);
+ locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Secondary,0);
+ locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Tertiary,0);
+ do_test_parser('&x <<< a << b <<< c',locStatement);
+ do_test_parser('&x <<<a <<b <<<c',locStatement);
+ do_test_parser('&x <<<a<<b<<<c',locStatement);
+end;
+
+procedure test_parser_multi_statement_line_1();
+const STATEMENT_BUFFER : UTF8String = '&r <<a &s <<< b';
+var
+ locStatements : array of TReorderSequence;
+ locStatement : PReorderSequence;
+ locExpectedStatement : TReorderSequence;
+ lineCount, i, bufferLength, k, nextPost : Integer;
+ buffer : PAnsiChar;
+begin
+ buffer := @STATEMENT_BUFFER[1];
+ WriteLn('Parsing "',buffer,'" ...');
+ bufferLength := Length(buffer);
+ SetLength(locStatements,10);
+ lineCount := 0;
+ nextPost := 0;
+ i := 0;
+ k := 0;
+ while (i < bufferLength) do begin
+ locStatement := @locStatements[k];
+ locStatement^.Clear();
+ if not ParseStatement(buffer,i,bufferLength,locStatement,nextPost,lineCount) then
+ Break;
+ i := nextPost;
+ k := k+1;
+ if (k > 2) then
+ raise Exception.Create('2 Statements expected, more was parsed.');
+ end;
+ Check((k=2), 'Statement Count');
+
+ locExpectedStatement.Clear();
+ SetLength(locExpectedStatement.Reset,1);
+ locExpectedStatement.Reset[0] := Ord('r');
+ locExpectedStatement.SetElementCount(1);
+ locExpectedStatement.Elements[0] :=
+ TReorderUnit.From(Ord('a'),TReorderWeigthKind.Secondary,0);
+ CheckEqual(locStatements[0],locExpectedStatement);
+
+ locExpectedStatement.Clear();
+ SetLength(locExpectedStatement.Reset,1);
+ locExpectedStatement.Reset[0] := Ord('s');
+ locExpectedStatement.SetElementCount(1);
+ locExpectedStatement.Elements[0] :=
+ TReorderUnit.From(Ord('b'),TReorderWeigthKind.Tertiary,0);
+ CheckEqual(locStatements[1],locExpectedStatement);
+
+ WriteLn(' -- test ok');
+end;
+
+procedure test_parser_multi_statement_line_2();
+const STATEMENT_BUFFER : UTF8String = '&r <a <b <<B &s <<< b <c';
+var
+ locStatements : array of TReorderSequence;
+ locStatement : PReorderSequence;
+ locExpectedStatement : TReorderSequence;
+ lineCount, i, bufferLength, k, nextPost : Integer;
+ buffer : PAnsiChar;
+begin
+ buffer := @STATEMENT_BUFFER[1];
+ WriteLn('Parsing "',buffer,'" ...');
+ bufferLength := Length(buffer);
+ SetLength(locStatements,10);
+ lineCount := 0;
+ nextPost := 0;
+ i := 0;
+ k := 0;
+ while (i < bufferLength) do begin
+ locStatement := @locStatements[k];
+ locStatement^.Clear();
+ if not ParseStatement(buffer,i,bufferLength,locStatement,nextPost,lineCount) then
+ Break;
+ i := nextPost;
+ k := k+1;
+ if (k > 2) then
+ raise Exception.Create('2 Statements expected, more was parsed.');
+ end;
+ Check((k=2), 'Statement Count');
+
+ locExpectedStatement.Clear();
+ SetLength(locExpectedStatement.Reset,1);
+ locExpectedStatement.Reset[0] := Ord('r');
+ locExpectedStatement.SetElementCount(3);
+ locExpectedStatement.Elements[0] :=
+ TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0);
+ locExpectedStatement.Elements[1] :=
+ TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,0);
+ locExpectedStatement.Elements[2] :=
+ TReorderUnit.From(Ord('B'),TReorderWeigthKind.Secondary,0);
+ CheckEqual(locStatements[0],locExpectedStatement);
+
+ locExpectedStatement.Clear();
+ SetLength(locExpectedStatement.Reset,1);
+ locExpectedStatement.Reset[0] := Ord('s');
+ locExpectedStatement.SetElementCount(2);
+ locExpectedStatement.Elements[0] :=
+ TReorderUnit.From(Ord('b'),TReorderWeigthKind.Tertiary,0);
+ locExpectedStatement.Elements[1] :=
+ TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,0);
+ CheckEqual(locStatements[1],locExpectedStatement);
+
+ WriteLn(' -- test ok');
+end;
+
+procedure test_parser_multi_statement_line_3();
+const STATEMENT_BUFFER : UTF8String = '&r <a <b <<B &s <<< b <c &x <A <W';
+var
+ locStatements : array of TReorderSequence;
+ locStatement : PReorderSequence;
+ locExpectedStatement : TReorderSequence;
+ lineCount, i, bufferLength, k, nextPost : Integer;
+ buffer : PAnsiChar;
+begin
+ buffer := @STATEMENT_BUFFER[1];
+ WriteLn('Parsing "',buffer,'" ...');
+ bufferLength := Length(buffer);
+ SetLength(locStatements,10);
+ lineCount := 0;
+ nextPost := 0;
+ i := 0;
+ k := 0;
+ while (i < bufferLength) do begin
+ locStatement := @locStatements[k];
+ locStatement^.Clear();
+ if not ParseStatement(buffer,i,bufferLength,locStatement,nextPost,lineCount) then
+ Break;
+ i := nextPost;
+ k := k+1;
+ if (k > 3) then
+ raise Exception.Create('3 Statements expected, more was parsed.');
+ end;
+ Check((k=3), 'Statement Count');
+
+ locExpectedStatement.Clear();
+ SetLength(locExpectedStatement.Reset,1);
+ locExpectedStatement.Reset[0] := Ord('r');
+ locExpectedStatement.SetElementCount(3);
+ locExpectedStatement.Elements[0] :=
+ TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0);
+ locExpectedStatement.Elements[1] :=
+ TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,0);
+ locExpectedStatement.Elements[2] :=
+ TReorderUnit.From(Ord('B'),TReorderWeigthKind.Secondary,0);
+ CheckEqual(locStatements[0],locExpectedStatement);
+
+ locExpectedStatement.Clear();
+ SetLength(locExpectedStatement.Reset,1);
+ locExpectedStatement.Reset[0] := Ord('s');
+ locExpectedStatement.SetElementCount(2);
+ locExpectedStatement.Elements[0] :=
+ TReorderUnit.From(Ord('b'),TReorderWeigthKind.Tertiary,0);
+ locExpectedStatement.Elements[1] :=
+ TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,0);
+ CheckEqual(locStatements[1],locExpectedStatement);
+
+ locExpectedStatement.Clear();
+ SetLength(locExpectedStatement.Reset,1);
+ locExpectedStatement.Reset[0] := Ord('x');
+ locExpectedStatement.SetElementCount(2);
+ locExpectedStatement.Elements[0] :=
+ TReorderUnit.From(Ord('A'),TReorderWeigthKind.Primary,0);
+ locExpectedStatement.Elements[1] :=
+ TReorderUnit.From(Ord('W'),TReorderWeigthKind.Primary,0);
+ CheckEqual(locStatements[2],locExpectedStatement);
+
+ WriteLn(' -- test ok');
+end;
+
+procedure test_parser_multi_statement_line_4();
+const STATEMENT_BUFFER : UTF8String =
+ ' &r <a <b <<B &s <<< b <c &x <A <W';
+var
+ locStatements : array of TReorderSequence;
+ locStatement : PReorderSequence;
+ locExpectedStatement : TReorderSequence;
+ lineCount, i, bufferLength, k, nextPost : Integer;
+ buffer : PAnsiChar;
+begin
+ buffer := @STATEMENT_BUFFER[1];
+ WriteLn('Parsing "',buffer,'" ...');
+ bufferLength := Length(buffer);
+ SetLength(locStatements,10);
+ lineCount := 0;
+ nextPost := 0;
+ i := 0;
+ k := 0;
+ while (i < bufferLength) do begin
+ locStatement := @locStatements[k];
+ locStatement^.Clear();
+ if not ParseStatement(buffer,i,bufferLength,locStatement,nextPost,lineCount) then
+ Break;
+ i := nextPost;
+ k := k+1;
+ if (k > 3) then
+ raise Exception.Create('3 Statements expected, more was parsed.');
+ end;
+ Check((k=3), 'Statement Count');
+
+ locExpectedStatement.Clear();
+ SetLength(locExpectedStatement.Reset,1);
+ locExpectedStatement.Reset[0] := Ord('r');
+ locExpectedStatement.SetElementCount(3);
+ locExpectedStatement.Elements[0] :=
+ TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0);
+ locExpectedStatement.Elements[1] :=
+ TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,0);
+ locExpectedStatement.Elements[2] :=
+ TReorderUnit.From(Ord('B'),TReorderWeigthKind.Secondary,0);
+ CheckEqual(locStatements[0],locExpectedStatement);
+
+ locExpectedStatement.Clear();
+ SetLength(locExpectedStatement.Reset,1);
+ locExpectedStatement.Reset[0] := Ord('s');
+ locExpectedStatement.SetElementCount(2);
+ locExpectedStatement.Elements[0] :=
+ TReorderUnit.From(Ord('b'),TReorderWeigthKind.Tertiary,0);
+ locExpectedStatement.Elements[1] :=
+ TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,0);
+ CheckEqual(locStatements[1],locExpectedStatement);
+
+ locExpectedStatement.Clear();
+ SetLength(locExpectedStatement.Reset,1);
+ locExpectedStatement.Reset[0] := Ord('x');
+ locExpectedStatement.SetElementCount(2);
+ locExpectedStatement.Elements[0] :=
+ TReorderUnit.From(Ord('A'),TReorderWeigthKind.Primary,0);
+ locExpectedStatement.Elements[1] :=
+ TReorderUnit.From(Ord('W'),TReorderWeigthKind.Primary,0);
+ CheckEqual(locStatements[2],locExpectedStatement);
+
+ WriteLn(' -- test ok');
+end;
+
+procedure test_parser_multi_line_statements_1();
+const STATEMENT_BUFFER : UTF8String =
+ '&r <a #123'#10 +
+ '&s <<< b ';
+var
+ locStatements : array of TReorderSequence;
+ locStatement : PReorderSequence;
+ locExpectedStatement : TReorderSequence;
+ lineCount, i, bufferLength, k, nextPost : Integer;
+ buffer : PAnsiChar;
+begin
+ buffer := @STATEMENT_BUFFER[1];
+ WriteLn('Parsing "',buffer,'" ...');
+ bufferLength := Length(buffer);
+ SetLength(locStatements,10);
+ lineCount := 0;
+ nextPost := 0;
+ i := 0;
+ k := 0;
+ while (i < bufferLength) do begin
+ locStatement := @locStatements[k];
+ locStatement^.Clear();
+ if not ParseStatement(buffer,i,bufferLength,locStatement,nextPost,lineCount) then
+ Break;
+ i := nextPost;
+ k := k+1;
+ if (k > 2) then
+ raise Exception.Create('2 Statements expected, more was parsed.');
+ end;
+ Check((k=2), 'Statement Count');
+
+ locExpectedStatement.Clear();
+ SetLength(locExpectedStatement.Reset,1);
+ locExpectedStatement.Reset[0] := Ord('r');
+ locExpectedStatement.SetElementCount(1);
+ locExpectedStatement.Elements[0] :=
+ TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0);
+ CheckEqual(locStatements[0],locExpectedStatement);
+
+ locExpectedStatement.Clear();
+ SetLength(locExpectedStatement.Reset,1);
+ locExpectedStatement.Reset[0] := Ord('s');
+ locExpectedStatement.SetElementCount(1);
+ locExpectedStatement.Elements[0] :=
+ TReorderUnit.From(Ord('b'),TReorderWeigthKind.Tertiary,0);
+ CheckEqual(locStatements[1],locExpectedStatement);
+
+ WriteLn(' -- test ok');
+end;
+
+//----------------------------------------------------------------------------//
+const
+ UNICODE_LINE_BREAK = #10;
+ COLLATION_XML_TEXT =
+ '<ldml>' + UNICODE_LINE_BREAK +
+ ' <identity>' + UNICODE_LINE_BREAK +
+ ' <version number="1.2.3"/>' + UNICODE_LINE_BREAK +
+ ' <generation date="$Date: 2014-07-08 21:39:31 -0500 (Tue, 08 Jul 2014) $"/>' + UNICODE_LINE_BREAK +
+ ' <language type="xy" />' + UNICODE_LINE_BREAK +
+ ' </identity>' + UNICODE_LINE_BREAK +
+ ' <collations >' + UNICODE_LINE_BREAK +
+ ' <defaultCollation>one</defaultCollation>' + UNICODE_LINE_BREAK +
+ ' <collation type="abc" >' + UNICODE_LINE_BREAK +
+ ' <import source="xy" type="private-two"/>' + UNICODE_LINE_BREAK +
+ ' <import source="xy" type="one"/>' + UNICODE_LINE_BREAK +
+ ' <suppress_contractions>[qh]</suppress_contractions>' + UNICODE_LINE_BREAK +
+ ' <cr><![CDATA[' + UNICODE_LINE_BREAK +
+ ' &w<u<v' + UNICODE_LINE_BREAK +
+ ' ]]></cr>' + UNICODE_LINE_BREAK +
+ ' </collation>' + UNICODE_LINE_BREAK +
+ ' <collation type="one" >' + UNICODE_LINE_BREAK +
+ ' <cr><![CDATA[' + UNICODE_LINE_BREAK +
+ ' &h<z<b' + UNICODE_LINE_BREAK +
+ ' ]]></cr>' + UNICODE_LINE_BREAK +
+ ' </collation>' + UNICODE_LINE_BREAK +
+ ' <collation type="private-two" >' + UNICODE_LINE_BREAK +
+ ' <cr><![CDATA[' + UNICODE_LINE_BREAK +
+ ' &f<c<<<ce' + UNICODE_LINE_BREAK +
+ ' &q<qh<<<p' + UNICODE_LINE_BREAK +
+ ' ]]></cr>' + UNICODE_LINE_BREAK +
+ ' </collation >' + UNICODE_LINE_BREAK +
+ ' <collation type="three" >' + UNICODE_LINE_BREAK +
+ ' <cr><![CDATA[' + UNICODE_LINE_BREAK +
+ ' &d<c<b<a' + UNICODE_LINE_BREAK +
+ ' ]]></cr>' + UNICODE_LINE_BREAK +
+ ' </collation>' + UNICODE_LINE_BREAK +
+ ' </collations>' + UNICODE_LINE_BREAK +
+ '</ldml>';
+ COLLATION_XML_TEXT2 =
+ '<ldml>' + UNICODE_LINE_BREAK +
+ ' <identity>' + UNICODE_LINE_BREAK +
+ ' <version number="1.2.3"/>' + UNICODE_LINE_BREAK +
+ ' <generation date="$Date: 2014-07-08 21:39:31 -0500 (Tue, 08 Jul 2014) $"/>' + UNICODE_LINE_BREAK +
+ ' <language type="kw" />' + UNICODE_LINE_BREAK +
+ ' </identity>' + UNICODE_LINE_BREAK +
+ ' <collations >' + UNICODE_LINE_BREAK +
+ ' <defaultCollation>wend</defaultCollation>' + UNICODE_LINE_BREAK +
+ ' <collation type="kis" >' + UNICODE_LINE_BREAK +
+ ' <cr><![CDATA[' + UNICODE_LINE_BREAK +
+ ' &x<c<v' + UNICODE_LINE_BREAK +
+ ' ]]></cr>' + UNICODE_LINE_BREAK +
+ ' </collation>' + UNICODE_LINE_BREAK +
+ ' <collation type="wend" >' + UNICODE_LINE_BREAK +
+ ' <import source="xy" type="one"/>' + UNICODE_LINE_BREAK +
+ ' <cr><![CDATA[' + UNICODE_LINE_BREAK +
+ ' &F<<P<<<C' + UNICODE_LINE_BREAK +
+ ' &L<a<<<Z' + UNICODE_LINE_BREAK +
+ ' ]]></cr>' + UNICODE_LINE_BREAK +
+ ' </collation>' + UNICODE_LINE_BREAK +
+ ' </collations>' + UNICODE_LINE_BREAK +
+ '</ldml>';
+
+function PrepareCollationStream(const AText : string) : TStream;
+begin
+ Result := TMemoryStream.Create();
+ if (AText <> '') then
+ Result.Write(AText[1],(Length(AText)*SizeOf(Char)));
+end;
+
+function PrepareRepositoryLoader() : ICldrCollationLoader;
+var
+ s : array of TStream;
+begin
+ SetLength(s,2);
+ s[0] := PrepareCollationStream(COLLATION_XML_TEXT);
+ s[1] := PrepareCollationStream(COLLATION_XML_TEXT2);
+ Result := TCldrCollationStreamLoader.Create(['xy','kw'],s) as ICldrCollationLoader;
+end;
+
+procedure test_collation_parser_HeaderParsing();
+var
+ rep : TCldrCollationRepository;
+ col : TCldrCollation;
+ typ : TCldrCollationItem;
+ imp : TCldrImport;
+begin
+ rep := TCldrCollationRepository.Create(PrepareRepositoryLoader());
+ try
+ Check(rep.Find('xy')=nil, 'Find() before load.');
+ Check(rep.Find('ab')=nil, 'Find() before load.');
+ col := rep.Load('xy',TCldrParserMode.HeaderParsing);
+ Check(col <> nil, 'load()');
+ Check(col.Mode=TCldrParserMode.HeaderParsing, 'Mode');
+ Check(rep.Find('xy') <> nil, 'Find() after load.');
+ Check(rep.Find('ab')=nil);
+ WriteLn(' - Step 1 ok');
+
+ Check(col.DefaultType='one', 'DefaultType');
+ Check(col.ItemCount=4, 'col.ItemCount');
+ Check(col.Find('one')<>nil, 'col.Find()');
+ Check(col.Find('private-two')<>nil, 'col.Find()');
+ Check(col.Find('three')<>nil, 'col.Find()');
+ Check(col.Find('abc')<>nil, 'col.Find()');
+
+ WriteLn(' - Step 2 ok');
+
+ typ := col.Find('private-two');
+ check(typ.IsPrivate(),'IsPrivate()');
+
+ WriteLn(' - Step 3 ok');
+
+ Check(col.Find('one').Imports.Count=0, 'one.imports=0');
+ Check(col.Find('private-two').Imports.Count=0, 'private-two.imports=0');
+ Check(col.Find('three').Imports.Count=0, 'three.imports=0');
+
+ WriteLn(' - Step 4 ok');
+
+ typ := col.Find('abc');
+ check(typ.Imports.Count=2,'abc.imports=2');
+ imp := typ.Imports[0];
+ check(imp<>nil, 'abc.Imports[0]');
+ check(
+ (imp.Source = 'xy') and (imp.TypeName = 'private-two'),
+ 'abc.Imports[0]'
+ );
+ imp := typ.Imports[1];
+ check(imp<>nil, 'abc.Imports[1]');
+ check(
+ (imp.Source = 'xy') and (imp.TypeName = 'one'),
+ 'abc.Imports[1]'
+ );
+
+ WriteLn(' - Step 5 ok');
+ finally
+ rep.Free();
+ end;
+
+ WriteLn(' -- test ok');
+end;
+
+procedure test_collation_parser_HeaderParsing_2();
+var
+ rep : TCldrCollationRepository;
+ col : TCldrCollation;
+ typ : TCldrCollationItem;
+ imp : TCldrImport;
+begin
+ rep := TCldrCollationRepository.Create(PrepareRepositoryLoader());
+ try
+ Check(rep.Find('kw')=nil, 'Find() before load.');
+ Check(rep.Find('xy')=nil, 'Find() before load.');
+ col := rep.Load('kw',TCldrParserMode.HeaderParsing);
+ Check(col <> nil, 'load()');
+ Check(col.Mode=TCldrParserMode.HeaderParsing, 'Mode');
+ Check(rep.Find('kw') <> nil, 'Find() after load.');
+ WriteLn(' - Step 1 ok');
+
+ Check(rep.Find('xy')=nil, 'Find(xy) after load.');
+ WriteLn(' - Step 2 ok');
+
+ typ := col.Find('wend');
+ check(typ.Imports.Count=1,'wend.imports=1');
+ imp := typ.Imports[0];
+ check(imp<>nil, 'wend.Imports[0]');
+ check(
+ (imp.Source = 'xy') and (imp.TypeName = 'one'),
+ 'wend.Imports[0]'
+ );
+
+ WriteLn(' - Step 3 ok');
+ finally
+ rep.Free();
+ end;
+
+ WriteLn(' -- test ok');
+end;
+
+function ParseSingleStatement(
+ const AText : UnicodeString;
+ AStatement : PReorderSequence
+) : Boolean;
+var
+ np, lc : Integer;
+ u8 : UTF8String;
+begin
+ u8 := UTF8Encode(AText);
+ np := 0;
+ lc := 0;
+ Result := ParseStatement(@u8[1],0,Length(u8),AStatement,np,lc);
+end;
+
+function ParseMultiStatements(
+ AText : UnicodeString;
+ AStatementList : PReorderSequence;
+ const AListLength : Integer
+) : Integer;
+var
+ c, nextPos, lineCount, i : Integer;
+ u8 : UTF8String;
+ buffer : PAnsiChar;
+ statement, lastStatement : PReorderSequence;
+begin
+ u8 := UTF8Encode(AText);
+ c := Length(u8);
+ buffer := @u8[1];
+ nextPos := 0;
+ i := 0;
+ lineCount := 0;
+ statement := AStatementList;
+ lastStatement := AStatementList+AListLength;
+ while (i < c) and (statement < lastStatement) do begin
+ statement^.Clear();
+ if not ParseStatement(buffer,i,c,statement,nextPos,lineCount) then
+ Break;
+ i := nextPos;
+ Inc(statement);
+ end;
+ Result := statement-AStatementList;
+end;
+
+type
+ TReorderSequenceArrayRec = record
+ Data : TReorderSequenceArray;
+ ActualLengh : Integer;
+ end;
+ PReorderSequenceArrayRec = ^TReorderSequenceArrayRec;
+
+function CopyVisitorFunc(
+ ARule : PReorderSequence;
+ AOwner : TCldrCollationItem;
+ AData : Pointer
+) : Boolean;
+var
+ p : PReorderSequenceArrayRec;
+begin
+ p := PReorderSequenceArrayRec(AData);
+ Result := (p^.ActualLengh < Length(p^.Data));
+ if Result then begin
+ p^.Data[p^.ActualLengh].Assign(ARule);
+ p^.ActualLengh := p^.ActualLengh+1;
+ end;
+end;
+
+procedure test_collation_parser_FullParsing();
+var
+ rep : TCldrCollationRepository;
+ col : TCldrCollation;
+ typ : TCldrCollationItem;
+ imp : TCldrImport;
+ locStatement : TReorderSequence;
+ locStatementList : TReorderSequenceArray;
+ c, i : Integer;
+begin
+ rep := TCldrCollationRepository.Create(PrepareRepositoryLoader());
+ try
+ Check(rep.Find('xy')=nil, 'Find() before load.');
+ Check(rep.Find('ab')=nil, 'Find() before load.');
+ col := rep.Load('xy',TCldrParserMode.FullParsing);
+ Check(col <> nil, 'load()');
+ Check(col.Mode=TCldrParserMode.FullParsing, 'Mode');
+ Check(rep.Find('xy') <> nil, 'Find() after load.');
+ Check(rep.Find('ab')=nil);
+ WriteLn(' - Step 1 ok');
+
+ Check(col.DefaultType='one', 'DefaultType');
+ Check(col.ItemCount=4, 'col.ItemCount');
+ Check(col.Find('one')<>nil, 'col.Find()');
+ Check(col.Find('private-two')<>nil, 'col.Find()');
+ Check(col.Find('three')<>nil, 'col.Find()');
+ Check(col.Find('abc')<>nil, 'col.Find()');
+
+ WriteLn(' - Step 2 ok');
+
+ typ := col.Find('private-two');
+ check(typ.IsPrivate(),'IsPrivate()');
+
+ WriteLn(' - Step 3 ok');
+
+ Check(col.Find('one').Imports.Count=0, 'one.imports=0');
+ Check(col.Find('private-two').Imports.Count=0, 'private-two.imports=0');
+ Check(col.Find('three').Imports.Count=0, 'three.imports=0');
+
+ WriteLn(' - Step 4 ok');
+
+ typ := col.Find('abc');
+ check(typ.Imports.Count=2,'abc.imports=2');
+ imp := typ.Imports[0];
+ check(imp<>nil, 'abc.Imports[0]');
+ check(
+ (imp.Source = 'xy') and (imp.TypeName = 'private-two'),
+ 'abc.Imports[0]'
+ );
+ imp := typ.Imports[1];
+ check(imp<>nil, 'abc.Imports[1]');
+ check(
+ (imp.Source = 'xy') and (imp.TypeName = 'one'),
+ 'abc.Imports[1]'
+ );
+ Check(Length(typ.Rules)=2,'Length(abc.Rules)=2');
+ Check(Length(typ.Rules[0].Elements)=2,'Length(typ.Rules[0].Elements)=2');
+ Check(typ.Rules[0].Elements[0].WeigthKind=TReorderWeigthKind.Deletion,'typ.Rules[0].Elements[0].WeigthKind=TReorderWeigthKind.Deletion');
+ Check(Length(typ.Rules[0].Elements[0].Characters)=1,'Length(typ.Rules[0].Elements[0].Characters)=1');
+ Check(typ.Rules[0].Elements[0].Characters[0]=Ord('h'),'typ.Rules[0].Elements[0].Characters[0]=h');
+ Check(typ.Rules[0].Elements[1].Characters[0]=Ord('q'),'typ.Rules[0].Elements[1].Characters[0]=q');
+ WriteLn(' - Step 5 ok');
+
+ typ := col.Find('one');
+ Check(Length(typ.Rules)>0, 'one.Rules <> nil');
+ locStatement.Clear();
+ Check(ParseSingleStatement('&h<z<b',@locStatement));
+ CheckEqual(locStatement,typ.Rules[0]);
+ WriteLn(' - Step 6 ok');
+
+ typ := col.Find('private-two');
+ Check(Length(typ.Rules)>0, 'private-two.Rules <> nil');
+ c := 2;
+ SetLength(locStatementList,5);
+ Check(
+ ParseMultiStatements(
+ '&f<c<<<ce' + UNICODE_LINE_BREAK+'&q<qh<<<p ',
+ @locStatementList[0],
+ Length(locStatementList)
+ ) = c
+ );
+ for i := 0 to c-1 do
+ CheckEqual(locStatementList[i],typ.Rules[i]);
+ WriteLn(' - Step 7 ok');
+
+ typ := col.Find('three');
+ Check(Length(typ.Rules)>0, 'three.Rules <> nil');
+ locStatement.Clear();
+ Check(ParseSingleStatement('&d<c<b<a',@locStatement));
+ CheckEqual(locStatement,typ.Rules[0]);
+ WriteLn(' - Step 8 ok');
+ finally
+ rep.Free();
+ end;
+
+ WriteLn(' -- test ok');
+end;
+
+procedure test_collation_parser_FullParsing_2();
+var
+ rep : TCldrCollationRepository;
+ col : TCldrCollation;
+ typ : TCldrCollationItem;
+ imp : TCldrImport;
+ locStatementList : TReorderSequenceArray;
+ c, i : Integer;
+begin
+ rep := TCldrCollationRepository.Create(PrepareRepositoryLoader());
+ try
+ Check(rep.Find('kw')=nil, 'Find() before load.');
+ Check(rep.Find('xy')=nil, 'Find() before load.');
+ col := rep.Load('kw',TCldrParserMode.FullParsing);
+ Check(col <> nil, 'load()');
+ Check(col.Mode=TCldrParserMode.FullParsing, 'Mode');
+ Check(rep.Find('kw') <> nil, 'Find() after load.');
+ WriteLn(' - Step 1 ok');
+
+ typ := col.Find('wend');
+ check(typ.Imports.Count=1,'wend.imports=1');
+ imp := typ.Imports[0];
+ check(imp<>nil, 'wend.Imports[0]');
+ check(
+ (imp.Source = 'xy') and (imp.TypeName = 'one'),
+ 'wend.Imports[0]'
+ );
+ Check(Length(typ.Rules)>0, 'wend.Rules <> nil');
+ c := 2;
+ SetLength(locStatementList,5);
+ Check(
+ ParseMultiStatements(
+ '&F<<P<<<C' + UNICODE_LINE_BREAK+'&L<a<<<Z ',
+ @locStatementList[0],
+ Length(locStatementList)
+ ) = c
+ );
+ for i := 0 to c-1 do
+ CheckEqual(locStatementList[i],typ.Rules[i]);
+ WriteLn(' - Step 2 ok');
+
+ finally
+ rep.Free();
+ end;
+
+ WriteLn(' -- test ok');
+end;
+
+procedure test_collation_parser_complete_rules();
+var
+ rep : TCldrCollationRepository;
+ col : TCldrCollation;
+ typ, xtyp : TCldrCollationItem;
+ c, i : Integer;
+ locData : TReorderSequenceArrayRec;
+begin
+ rep := TCldrCollationRepository.Create(PrepareRepositoryLoader());
+ try
+ col := rep.Load('xy',TCldrParserMode.FullParsing);
+ SetLength(locData.Data,23);
+
+ typ := col.Find('one');
+ locData.ActualLengh := 0;
+ Check(ForEachRule(typ,@CopyVisitorFunc,@locData), 'ForEachRule(one) - 1');
+ Check(locData.ActualLengh = 1, 'ForEachRule(one) - 2');
+ CheckEqual(locData.Data[0],typ.Rules[0]);
+ WriteLn(' - Step 1 ok');
+
+ typ := col.Find('private-two');
+ locData.ActualLengh := 0;
+ Check(ForEachRule(typ,@CopyVisitorFunc,@locData), 'ForEachRule(private-two) - 1');
+ Check(locData.ActualLengh = 2, 'ForEachRule(private-two) - 2');
+ for i := 0 to locData.ActualLengh-1 do
+ CheckEqual(locData.Data[i],typ.Rules[i]);
+ WriteLn(' - Step 2 ok');
+
+ typ := col.Find('abc');
+ locData.ActualLengh := 0;
+ SetLength(locData.Data,23);
+ Check(ForEachRule(typ,@CopyVisitorFunc,@locData), 'ForEachRule(abc) - 1');
+ Check(locData.ActualLengh = 2+2{private-two}+1{one}, 'ForEachRule(abc) - 2');
+ xtyp := col.Find('private-two');
+ c := 0;
+ for i := 0 to Length(xtyp.Rules)-1 do
+ CheckEqual(locData.Data[c+i],xtyp.Rules[i]);
+ c := c+Length(xtyp.Rules);
+ xtyp := col.Find('one');
+ for i := 0 to Length(xtyp.Rules)-1 do
+ CheckEqual(locData.Data[c+i],xtyp.Rules[i]);
+ c := c+Length(xtyp.Rules);
+ for i := 0 to Length(typ.Rules)-1 do
+ CheckEqual(locData.Data[c+i],typ.Rules[i]);
+ WriteLn(' - Step 2 ok');
+ finally
+ rep.Free();
+ end;
+
+ WriteLn(' -- test ok');
+end;
+
+procedure test_collation_parser_complete_rules_2();
+var
+ rep : TCldrCollationRepository;
+ col, xcol : TCldrCollation;
+ typ, xtyp : TCldrCollationItem;
+ locData : TReorderSequenceArrayRec;
+ c, i : Integer;
+begin
+ rep := TCldrCollationRepository.Create(PrepareRepositoryLoader());
+ try
+ col := rep.Load('kw',TCldrParserMode.FullParsing);
+ SetLength(locData.Data,23);
+
+ typ := col.Find('wend');
+ locData.ActualLengh := 0;
+ Check(ForEachRule(typ,@CopyVisitorFunc,@locData), 'ForEachRule(wend) - 1');
+ Check(locData.ActualLengh = 2+1{one}, 'ForEachRule(wend) - 2');
+ xcol := rep.Find('xy');
+ Check(xcol <> nil);
+ xtyp := xcol.Find('one');
+ Check(xtyp <> nil);
+ Check(Length(xtyp.Rules)=1);
+ c := 0;
+ for i := 0 to Length(xtyp.Rules)-1 do
+ CheckEqual(locData.Data[c+i],xtyp.Rules[i]);
+ c := c+Length(xtyp.Rules);
+ for i := 0 to Length(typ.Rules)-1 do
+ CheckEqual(locData.Data[c+i],typ.Rules[i]);
+ WriteLn(' - Step 1 ok');
+
+ finally
+ rep.Free();
+ end;
+
+ WriteLn(' -- test ok');
+end;
+
+procedure test_unicode_set_1();
+var
+ x : TUnicodeSet;
+ i : Integer;
+ s : string;
+begin
+ x := TUnicodeSet.Create();
+ try
+ for i := 0 to 256-1 do
+ Check(not x.Contains(AnsiChar(i)));
+ WriteLn(' - Stept 1 ok');
+
+ s := 'azerty';
+ x.AddPattern(Format('[%s]',[s]));
+ for i := 1 to Length(s) do
+ Check(x.Contains(s[i]));
+ WriteLn(' - Stept 2 ok');
+ finally
+ x.Free();
+ end;
+
+ WriteLn(' -- test ok');
+end;
+
+procedure test_unicode_set_2();
+var
+ x : TUnicodeSet;
+ i : Integer;
+begin
+ x := TUnicodeSet.Create();
+ try
+ x.AddPattern('[d-h]');
+ for i := Ord('d') to Ord('h') do
+ Check(x.Contains(Char(i)));
+ WriteLn(' - Stept 1 ok');
+
+ for i := Ord('a') to Ord('d')-1 do
+ Check(not x.Contains(Char(i)));
+ WriteLn(' - Stept 2 ok');
+
+ for i := Ord('h')+1 to 256-1 do
+ Check(not x.Contains(Char(i)));
+ WriteLn(' - Stept 3 ok');
+ finally
+ x.Free();
+ end;
+
+ WriteLn(' -- test ok');
+end;
+
+procedure test_unicode_set_3();
+var
+ x : TUnicodeSet;
+ s, s1 : string;
+begin
+ x := TUnicodeSet.Create();
+ try
+ s := 'azerty';
+ x.AddPattern(Format('[{%s}]',[s]));
+ Check(x.Contains(s));
+ WriteLn(' - Stept 1 ok');
+
+ Check(not x.Contains(s+'12'));
+ WriteLn(' - Stept 2 ok');
+
+ Check(not x.Contains('qs'+s));
+ WriteLn(' - Stept 3 ok');
+
+ s1 := s+'x';
+ x.AddPattern(Format('[{%s}]',[s1]));
+ Check(x.Contains(s));
+ Check(x.Contains(s1));
+ WriteLn(' - Stept 4 ok');
+ finally
+ x.Free();
+ end;
+
+ WriteLn(' -- test ok');
+end;
+
end.
diff --git a/utils/unicode/cldrtxt.pas b/utils/unicode/cldrtxt.pas
new file mode 100644
index 0000000000..44dd796adc
--- /dev/null
+++ b/utils/unicode/cldrtxt.pas
@@ -0,0 +1,687 @@
+{ Parser of the CLDR collation tailoring files.
+ This parser handle the textual syntax for CLDR version > 23
+
+ Copyright (c) 2014,2015 by Inoussa OUEDRAOGO
+
+ The source code is distributed under the Library GNU
+ General Public License with the following modification:
+
+ - object files and libraries linked into an application may be
+ distributed without source code.
+
+ If you didn't receive a copy of the file COPYING, contact:
+ Free Software Foundation
+ 675 Mass Ave
+ Cambridge, MA 02139
+ USA
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+}
+unit cldrtxt;
+
+{$mode objfpc}{$H+}
+{$TypedAddress on}
+
+interface
+
+uses
+ Classes, SysUtils,
+ cldrhelper, helper;
+
+ procedure ParseInitialDocument(ASequence : POrderedCharacters; ADoc : TCustomMemoryStream);overload;
+ procedure ParseInitialDocument(ASequence : POrderedCharacters; AFileName : string);overload;
+
+ function ParseStatement(
+ AData : PAnsiChar;
+ AStartPosition,
+ AMaxLen : Integer;
+ AStatement : PReorderSequence;
+ var ANextPos,
+ ALineCount : Integer
+ ) : Boolean;
+
+implementation
+uses
+ unicodedata;
+
+const
+ s_BEFORE = 'before';
+
+function String2UnicodeCodePointArray(const AValue : UTF8String): TUnicodeCodePointArray;
+var
+ u4str : UCS4String;
+ k : Integer;
+begin
+ if (Length(AValue) = 0) then
+ exit(nil);
+ if (Length(AValue) = 1) then begin
+ SetLength(Result,1);
+ Result[0] := Ord(AValue[1])
+ end else begin
+ u4str := UnicodeStringToUCS4String(UTF8Decode(AValue));
+ k := Length(u4str) - 1; // remove the last #0
+ SetLength(Result,k);
+ for k := 0 to k - 1 do
+ Result[k] := u4str[k];
+ end;
+end;
+
+function TryStringToReorderWeigthKind(
+ const AStr : UTF8String;
+ out AResult : TReorderWeigthKind
+) : Boolean;
+begin
+ Result := True;
+ if (AStr = '=') then
+ AResult := TReorderWeigthKind.Identity
+ else if (AStr = '<') or (AStr = '>') then
+ AResult := TReorderWeigthKind.Primary
+ else if (AStr = '<<') or (AStr = '>>') then
+ AResult := TReorderWeigthKind.Secondary
+ else if (AStr = '<<<') or (AStr = '>>>') then
+ AResult := TReorderWeigthKind.Tertiary
+ else begin
+ AResult := TReorderWeigthKind.Identity;
+ Result := False;
+ end;
+end;
+
+function ParseStatement(
+ AData : PAnsiChar;
+ AStartPosition,
+ AMaxLen : Integer;
+ AStatement : PReorderSequence;
+ var ANextPos,
+ ALineCount : Integer
+) : Boolean;
+const
+ LINE_LENGTH = 1024;
+var
+ p : PAnsiChar;
+ bufferLength, bufferPos, lineLength, linePos, lineIndex : Integer;
+ line : UTF8String;
+ statement : PReorderSequence;
+ elementActualCount : Integer;
+ specialChararter : Boolean;
+ historyItemIndex : Integer;
+ historyItems : array[0..31] of record
+ p : PAnsiChar;
+ bufferLength,
+ bufferPos,
+ lineLength,
+ linePos,
+ lineIndex : Integer;
+ line : UTF8String;
+ end;
+
+ procedure SaveState();
+ begin
+ if (historyItemIndex >= High(historyItems)) then
+ raise Exception.Create('History buffer is full.');
+ historyItemIndex := historyItemIndex+1;
+ historyItems[historyItemIndex].p := p;
+ historyItems[historyItemIndex].bufferLength := bufferLength;
+ historyItems[historyItemIndex].bufferPos := bufferPos;
+ historyItems[historyItemIndex].lineLength := lineLength;
+ historyItems[historyItemIndex].linePos := linePos;
+ historyItems[historyItemIndex].lineIndex := lineIndex;
+ historyItems[historyItemIndex].line := line;
+ end;
+
+ procedure RestoreState();
+ begin
+ if (historyItemIndex < 0) then
+ raise Exception.Create('History buffer is empty.');
+ p := historyItems[historyItemIndex].p;
+ bufferLength := historyItems[historyItemIndex].bufferLength;
+ bufferPos := historyItems[historyItemIndex].bufferPos;
+ lineLength := historyItems[historyItemIndex].lineLength;
+ linePos := historyItems[historyItemIndex].linePos;
+ lineIndex := historyItems[historyItemIndex].lineIndex;
+ line := historyItems[historyItemIndex].line;
+ historyItemIndex := historyItemIndex-1;
+ end;
+
+ procedure DiscardState();
+ begin
+ if (historyItemIndex < 0) then
+ raise Exception.Create('History buffer is empty.');
+ historyItemIndex := historyItemIndex-1;
+ end;
+
+ function CurrentLine() : UTF8String; inline;
+ begin
+ Result := Copy(line,1,lineLength);
+ end;
+
+ function NextLine() : Boolean;
+ var
+ locOldPos : Integer;
+ locOldPointer : PAnsiChar;
+ begin
+ Result := False;
+ if (p^ = #10) then begin
+ Inc(p);
+ Inc(bufferPos);
+ end;
+ locOldPos := bufferPos;
+ locOldPointer := p;
+ while (bufferPos < bufferLength) and (p^ <> #10) do begin
+ Inc(p);
+ Inc(bufferPos);
+ end;
+ if (locOldPos = bufferPos) and (p^ = #10) then begin
+ lineLength := 0;
+ Inc(p);
+ Inc(bufferPos);
+ linePos := 1;
+ Result := True;
+ end else if (locOldPos < bufferPos) then begin
+ lineLength := (bufferPos - locOldPos);
+ if (lineLength >= Length(line)) then
+ SetLength(line,(2*lineLength));
+ Move(locOldPointer^,line[1],lineLength);
+ {if (p^ = #10) then begin
+ //Dec(lineLength);
+ Inc(p);
+ Inc(bufferPos);
+ end;}
+ linePos := 1;
+ Result := True;
+ end;
+ if Result and (locOldPos < bufferPos) then
+ lineIndex := lineIndex+1;
+ end;
+
+ procedure CheckLineLength(const ALength : Integer);
+ begin
+ if (ALength > lineLength) then
+ raise Exception.CreateFmt('Unexpected end of line : "%s".',[CurrentLine()]);
+ end;
+
+ function ReadChar(out AResult : UTF8String) : Boolean;
+ var
+ k : Integer;
+ us : UnicodeString;
+ begin
+ AResult := '';
+ Result := False;
+ if (linePos > lineLength) then
+ exit;
+ {if CharInSet(line[linePos],['#','=','&','[',']']) then begin
+ AResult := line[linePos];
+ Inc(linePos);
+ exit(True);
+ end;}
+ if (line[linePos] <> '\') then begin
+ AResult := line[linePos];
+ Inc(linePos);
+ exit(True);
+ end;
+ CheckLineLength(linePos+1);
+ Inc(linePos);
+ case line[linePos] of
+ '''': begin
+ AResult := '\';
+ exit(True);
+ end;
+ {'\' : begin
+ AResult := '\';
+ exit(True);
+ end;}
+ 'u' : begin
+ CheckLineLength(linePos+4);
+ AResult := '$'+Copy(line,(linePos+1),4);
+ if not TryStrToInt(AResult,k) then
+ raise Exception.CreateFmt('Hexadecimal Integer expected but found "%s", line = "%s".',[AResult,CurrentLine()]);
+ SetLength(us,1);
+ us[1] := UnicodeChar(k);
+ AResult := UTF8Encode(us);
+ Inc(linePos,5);
+ exit(True);
+ end;
+ 'U' : begin
+ CheckLineLength(linePos+8);
+ AResult := '$'+Copy(line,(linePos+1),8);
+ if not TryStrToInt(AResult,k) then
+ raise Exception.CreateFmt('Hexadecimal Integer expected but found "%s".',[AResult]);
+ if (k > High(Word)) then begin
+ SetLength(us,2);
+ FromUCS4(k,us[1],us[2]);
+ if (Ord(us[2]) = 0) then
+ SetLength(us,1);
+ end else begin
+ SetLength(us,1);
+ us[1] := UnicodeChar(k);
+ end;
+ AResult := UTF8Encode(us);
+ Inc(linePos,9);
+ exit(True);
+ end;
+ else
+ raise Exception.CreateFmt('Invalide escaped string "%s", at %d position.',[CurrentLine(),linePos]);
+ end;
+ end;
+
+ function ReadQuotedString() : UTF8String;
+ var
+ ks : UTF8String;
+ begin
+ if (line[linePos] <> '''') then
+ raise Exception.CreateFmt('Unexpected character found "%s", a quote expected: "%s".',[line[linePos],CurrentLine()]);
+ Inc(linePos);
+ if (linePos > lineLength) then
+ raise Exception.CreateFmt('Unexpected end of line, a quote expected: "%s".',[CurrentLine()]);
+ if (line[linePos] = '''') then begin
+ Inc(linePos);
+ Result := '''';
+ exit;
+ end;
+ Result := '';
+ while (linePos <= lineLength) and ReadChar(ks) do begin
+ Result := Result + ks;
+ if (line[linePos] = '''') then
+ break;
+ end;
+ if (line[linePos] = '''') then begin
+ Inc(linePos);
+ exit;
+ end;
+ raise Exception.CreateFmt('Unexpected end of line, a quote expected: "%s".',[line]);
+ end;
+
+ function ReadUnQuotedString() : UTF8String;
+ var
+ k : Integer;
+ begin
+ k := linePos;
+ while (linePos <= lineLength) and
+ not(CharInSet(line[linePos],[' ',#9,'#', '=','&','[',']','<','>','''','/','|']))
+ do begin
+ Inc(linePos);
+ end;
+ if (linePos > k) then begin
+ if (line[linePos] in [' ',#9,'#', '=','&','[',']','<','>','''','/','|']) then
+ Result := Copy(line,k,(linePos-k))
+ else
+ Result := Copy(line,k,(linePos-k)); //Result := Copy(line,k,(linePos-k+1));
+ end else begin
+ Result := '';
+ end;
+ end;
+
+ function NextToken() : UTF8String; overload;
+ var
+ k : Integer;
+ ks : UTF8String;
+ begin
+ specialChararter := False;
+ while True do begin
+ while (linePos <= lineLength) and CharInSet(line[linePos],[' ', #9, #13]) do begin
+ Inc(linePos);
+ end;
+ if (linePos > lineLength) or (line[linePos] = '#') then begin
+ if not NextLine() then
+ exit('');
+ Continue;
+ end ;
+ Break;
+ end;
+ if (linePos > lineLength) then
+ exit('');
+
+ if (line[linePos] = '*') then begin
+ linePos := linePos+1;
+ specialChararter := True;
+ exit('*');
+ end;
+ k := linePos;
+ if (linePos <= lineLength) and CharInSet(line[linePos],['<','>']) then begin
+ ks := line[linePos];
+ while (linePos <= lineLength) and (line[linePos] = ks) do begin
+ Inc(linePos);
+ end;
+ Result := Copy(line,k,(linePos-k));
+ exit;
+ end;
+ if (linePos <= lineLength) and
+ CharInSet(line[linePos],['=','&','[',']','<','>','/','|'])
+ then begin
+ Inc(linePos);
+ Result := Copy(line,k,(linePos-k));
+ specialChararter := True;
+ exit;
+ end;
+ {if (line[linePos] = '''') then
+ exit(ReadQuotedString()); }
+ Result := '';
+ while (linePos <= lineLength) do begin
+ if CharInSet(line[linePos],[' ',#9,#13,'#', '=','&','[',']','<','>','/','|']) then
+ Break;
+ if (line[linePos] <> '''') then
+ ks := ReadUnQuotedString()
+ else
+ ks := ReadQuotedString();
+ if (ks = '') then
+ Break;
+ Result := Result + ks;
+ end;
+ end;
+
+ function NextToken(const AMustSucceed : Boolean) : UTF8String; overload;
+ begin
+ Result := NextToken();
+ if (Result = '') and AMustSucceed then
+ raise Exception.CreateFmt('Unexpected end of line(%d) : "%s".',[lineIndex,CurrentLine()]);
+ end;
+
+ procedure CheckToken(const AActual, AExpectedToken : UTF8String);
+ begin
+ if (AActual <> AExpectedToken) then
+ raise Exception.CreateFmt(
+ '"%s" expected but "%s" found at position %d, BufferPosition(%d), line(%d) = "%s".',
+ [AExpectedToken,AActual,linePos,bufferPos,lineIndex,CurrentLine()]
+ );
+ end;
+
+ function parse_reset() : Boolean;
+ var
+ s, s1 : UTF8String;
+ logicalPos : TReorderLogicalReset;
+ k : Integer;
+ begin
+ s := NextToken();
+ if (s = '') then
+ exit(False);
+ CheckToken(s,'&');
+ s := NextToken(True);
+ if (s = '[') then begin
+ s := NextToken();
+ if (s = s_BEFORE) then begin
+ s := NextToken();
+ if not(TryStrToInt(s,k)) or (k < 1) or (k > 3) then
+ CheckToken(s,'"1" or "2" or "3"');
+ CheckToken(NextToken(True),']');
+ statement^.Reset := String2UnicodeCodePointArray(NextToken(True));
+ statement^.Before := True;
+ end else begin
+ while True do begin
+ s1 := NextToken();
+ if (s1 = '') or (s1 = ']') then
+ break;
+ s := s + Trim(s1)
+ end;
+ CheckToken(s1,']');
+ if (s = '') then
+ raise Exception.CreateFmt('Unexpected end of line : "%s".',[CurrentLine()]);
+ if not TryStrToLogicalReorder(s,logicalPos) then
+ raise Exception.CreateFmt(sUnknownResetLogicalPosition,[s]);
+ statement^.LogicalPosition := logicalPos;
+ end;
+ end else begin
+ statement^.Reset := String2UnicodeCodePointArray(s);
+ end;
+ if (statement^.LogicalPosition = TReorderLogicalReset.None) and
+ (Length(statement^.Reset) = 0)
+ then
+ raise Exception.Create(sInvalidResetClause);
+ Result := True;
+ end;
+
+ procedure EnsureElementLength(const ALength : Integer);
+ var
+ k, d : Integer;
+ begin
+ k := Length(statement^.Elements);
+ if (k < ALength) then begin
+ k := ALength;
+ if (k = 0) then begin
+ k := 50;
+ end else begin
+ if (k < 10) then
+ d := 10
+ else
+ d := 2;
+ k := k * d;
+ end;
+ statement^.SetElementCount(k);
+ end;
+ end;
+
+ procedure AddElement(
+ const AChars : array of UCS4Char;
+ const AWeigthKind : TReorderWeigthKind;
+ const AContext : UTF8String
+ );overload;
+ var
+ kp : PReorderUnit;
+ kc, k : Integer;
+ begin
+ EnsureElementLength(elementActualCount+1);
+ kp := @statement^.Elements[elementActualCount];
+ kc := Length(AChars)-1;
+ if (kc < 0) then
+ kc := 0;
+ SetLength(kp^.Characters,kc);
+ for k := 0 to kc - 1 do
+ kp^.Characters[k] := AChars[k];
+ kp^.WeigthKind := AWeigthKind;
+ elementActualCount := elementActualCount + 1;
+ if (AContext <> '') then
+ kp^.Context := String2UnicodeCodePointArray(AContext);
+ end;
+
+ procedure AddElement(
+ const AChar : UCS4Char;
+ const AWeigthKind : TReorderWeigthKind;
+ const AContext : UTF8String
+ );overload;
+ var
+ kp : PReorderUnit;
+ kc, k : Integer;
+ begin
+ EnsureElementLength(elementActualCount+1);
+ kp := @statement^.Elements[elementActualCount];
+ SetLength(kp^.Characters,1);
+ kp^.Characters[0] := AChar;
+ kp^.WeigthKind := AWeigthKind;
+ elementActualCount := elementActualCount + 1;
+ if (AContext <> '') then
+ kp^.Context := String2UnicodeCodePointArray(AContext);
+ end;
+
+ function ReadNextItem() : Boolean;
+ var
+ contextStr : UTF8String;
+ w : TReorderWeigthKind;
+ last : PReorderUnit;
+ u4str : UCS4String;
+ s, ts : UTF8String;
+ expandStr : TUnicodeCodePointArray;
+ k, kc, x : Integer;
+ us : UnicodeString;
+ begin
+ contextStr := '';
+ expandStr := nil;
+ Result := False;
+ SaveState();
+ s := NextToken();
+ if (s = '') then begin
+ DiscardState();
+ exit;
+ end;
+ if specialChararter and (s = '&') then begin
+ RestoreState();
+ exit;
+ end;
+ DiscardState();
+ if not TryStringToReorderWeigthKind(s,w) then
+ CheckToken(s,'Reorder Weigth');
+ s := NextToken(True);
+ if specialChararter then begin
+ if (s = '[') then begin
+ k := 1;
+ while True do begin
+ ts := NextToken(True);
+ s := s + ts;
+ if specialChararter then begin
+ if (ts = '[') then
+ k := k+1
+ else if (ts = ']') then begin
+ k := k-1;
+ if (k = 0) then
+ Break;
+ end;
+ end;
+ end;
+ if (Pos('variable',s) > 0) then
+ exit(True);
+ end else if (s = '*') then begin
+ s := NextToken(True);
+ us := UTF8Decode(s);
+ u4str := UnicodeStringToUCS4String(us);
+ kc := Length(u4str)-1;
+ k := 0;
+ while (k <= (kc-1)) do begin
+ if (k > 0) and (u4str[k] = Ord('-')) then begin
+ if (k = (kc-1)) then begin
+ AddElement(u4str[k],w,contextStr);
+ end else begin
+ for x := (u4str[k-1]+1) to u4str[k+1] do
+ AddElement(x,w,contextStr);
+ k := k+1;
+ end;
+ end else begin
+ AddElement(u4str[k],w,contextStr);
+ end;
+ k := k+1;
+ end;
+ exit(True);
+ end;
+ end;
+ SaveState();
+ ts := NextToken();
+ if (ts = '') or not(specialChararter) then begin
+ RestoreState();
+ us := UTF8Decode(s);
+ u4str := UnicodeStringToUCS4String(us);
+ end else begin
+ if (ts = '|') then begin
+ DiscardState();
+ contextStr := s;
+ s := NextToken(True);
+ SaveState();
+ ts := NextToken();
+ end;
+ if specialChararter and (ts = '/') then begin
+ expandStr := String2UnicodeCodePointArray(NextToken(True));
+ DiscardState();
+ end else begin
+ RestoreState();
+ end;
+ u4str := UnicodeStringToUCS4String(UTF8Decode(s));
+ end;
+ AddElement(u4str,w,contextStr);
+ if (Length(expandStr) > 0) then begin
+ last := @statement^.Elements[elementActualCount-1];
+ last^.ExpansionChars := expandStr;
+ end;
+ Result := True;
+ end;
+
+begin
+ Result := False;
+ elementActualCount := 0;
+ if (AStartPosition >= AMaxLen) then
+ exit;
+ historyItemIndex := -1;
+ lineIndex := ALineCount;
+ bufferLength := AMaxLen;
+ bufferPos := AStartPosition;
+ p := AData+AStartPosition;
+ SetLength(line,LINE_LENGTH);
+ statement := AStatement;
+ statement^.Clear();
+ if not NextLine() then
+ exit;
+ if not parse_reset() then
+ exit;
+ while ReadNextItem() do begin
+ // All done in the condition
+ end;
+ statement^.SetElementCount(elementActualCount);
+ if (linePos > lineLength) then
+ linePos := lineLength;
+ ANextPos := bufferPos-lineLength+linePos;
+ Result := (ANextPos > AStartPosition);
+ ALineCount := lineIndex;
+end;
+
+procedure ParseInitialDocument(ASequence : POrderedCharacters; ADoc : TCustomMemoryStream);
+var
+ buffer : PAnsiChar;
+ bufferLength : Integer;
+ i, nextPost : Integer;
+ statement : TReorderSequence;
+ p : PReorderUnit;
+ lineCount : Integer;
+begin
+ if (ADoc.Size < 1) then
+ exit;
+ buffer := ADoc.Memory; //0xEF,0xBB,0xBF
+ bufferLength := ADoc.Size;
+ if (bufferLength >= 3) and
+ (Byte(buffer[0]) = $EF) and
+ (Byte(buffer[1]) = $BB) and
+ (Byte(buffer[2]) = $BF)
+ then begin
+ Inc(buffer,3);
+ Dec(bufferLength,3);
+ end;
+ lineCount := 0;
+ ASequence^.Clear();
+ SetLength(ASequence^.Data,50000);
+ nextPost := 0;
+ i := 0;
+ while (i < bufferLength) do begin
+ statement.Clear();
+ if not ParseStatement(buffer,i,bufferLength,@statement,nextPost,lineCount) then
+ Break;
+ i := nextPost;
+ try
+ ASequence^.ApplyStatement(@statement);
+ except
+ on e : Exception do begin
+ e.Message := Format('%s Position = %d',[e.Message,i]);
+ raise;
+ end;
+ end;
+ end;
+ if (ASequence^.ActualLength > 0) then begin
+ p := @ASequence^.Data[0];
+ for i := 0 to ASequence^.ActualLength - 1 do begin
+ p^.Changed := False;
+ Inc(p);
+ end;
+ end;
+end;
+
+procedure ParseInitialDocument(ASequence : POrderedCharacters; AFileName : string);
+var
+ doc : TMemoryStream;
+begin
+ doc := TMemoryStream.Create();
+ try
+ doc.LoadFromFile(AFileName);
+ doc.Position := 0;
+ ParseInitialDocument(ASequence,doc);
+ finally
+ doc.Free();
+ end;
+end;
+
+
+end.
+
diff --git a/utils/unicode/cldrxml.pas b/utils/unicode/cldrxml.pas
index 49d57d7f08..f14c7f4ed1 100644
--- a/utils/unicode/cldrxml.pas
+++ b/utils/unicode/cldrxml.pas
@@ -1,6 +1,6 @@
{ Parser of the CLDR collation xml files.
- Copyright (c) 2013 by Inoussa OUEDRAOGO
+ Copyright (c) 2013, 2014, 2015 by Inoussa OUEDRAOGO
The source code is distributed under the Library GNU
General Public License with the following modification:
@@ -19,6 +19,12 @@
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
}
+{ The procedure whoses names lasted by 'XML' (ParseInitialDocumentXML,
+ ParseCollationDocumentXML, ...) are for older CLDR versions (CDLR <= 23); The
+ old version was unsing a XML syntax for collation's rules specifications.
+ The new versions (and going forward) will be using the text syntax.
+}
+
unit cldrxml;
{$mode objfpc}{$H+}
@@ -29,49 +35,123 @@ uses
Classes, SysUtils, DOM,
cldrhelper;
- procedure ParseInitialDocument(ASequence : POrderedCharacters; ADoc : TDOMDocument);overload;
- procedure ParseInitialDocument(ASequence : POrderedCharacters; AFileName : string);overload;
+type
+
+ { TCldrCollationFileLoader }
+
+ TCldrCollationFileLoader = class(TInterfacedObject,ICldrCollationLoader)
+ private
+ FPath : string;
+ private
+ procedure SetPath(APath : string);
+ function BuildFileName(ALanguage : string) : string;
+ procedure CheckFile(AFileName : string);
+ protected
+ procedure LoadCollation(
+ const ALanguage : string;
+ ACollation : TCldrCollation;
+ AMode : TCldrParserMode
+ );
+ procedure LoadCollationType(
+ const ALanguage,
+ ATypeName : string;
+ AType : TCldrCollationItem
+ );
+ public
+ constructor Create(APath : string);
+ end;
+
+ { TCldrCollationStreamLoader }
+
+ TCldrCollationStreamLoader = class(TInterfacedObject,ICldrCollationLoader)
+ private
+ FLanguages : array of string;
+ FStreams : array of TStream;
+ private
+ procedure CheckContent(ALanguage : string);
+ function IndexOf(ALanguage : string) : Integer;
+ protected
+ procedure LoadCollation(
+ const ALanguage : string;
+ ACollation : TCldrCollation;
+ AMode : TCldrParserMode
+ );
+ procedure LoadCollationType(
+ const ALanguage,
+ ATypeName : string;
+ AType : TCldrCollationItem
+ );
+ public
+ constructor Create(
+ const ALanguages : array of string;
+ const AStreams : array of TStream
+ );
+ destructor Destroy();override;
+ end;
+
+ procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; ADoc : TDOMDocument);overload;
+ procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; AFileName : string);overload;
- procedure ParseCollationDocument(
+ procedure ParseCollationDocumentXML(
ADoc : TDOMDocument;
ACollation : TCldrCollation;
AMode : TCldrParserMode
);overload;
- procedure ParseCollationDocument(
+ procedure ParseCollationDocumentXML(
+ ADoc : TDOMDocument;
+ ACollation : TCldrCollationItem;
+ AType : string
+ );overload;
+ procedure ParseCollationDocumentXML(
const AFileName : string;
ACollation : TCldrCollation;
AMode : TCldrParserMode
);overload;
+ procedure ParseCollationDocumentXML(
+ const AFileName : string;
+ ACollation : TCldrCollationItem;
+ AType : string
+ );overload;
- procedure ParseCollationDocument(
+ //-----------------------------------------------------
+ procedure ParseCollationDocument2(
+ ADoc : TDOMDocument;
+ ACollation : TCldrCollation;
+ AMode : TCldrParserMode
+ );overload;
+ procedure ParseCollationDocument2(
+ const AFileName : string;
+ ACollation : TCldrCollation;
+ AMode : TCldrParserMode
+ );overload;
+ procedure ParseCollationDocument2(
+ AStream : TStream;
+ ACollation : TCldrCollation;
+ AMode : TCldrParserMode
+ );overload;
+
+ procedure ParseCollationDocument2(
const AFileName : string;
ACollation : TCldrCollationItem;
AType : string
);overload;
- procedure ParseCollationDocument(
+ procedure ParseCollationDocument2(
ADoc : TDOMDocument;
ACollation : TCldrCollationItem;
AType : string
);overload;
-
-resourcestring
- sCaseNothandled = 'This case is not handled : "%s", Position = %d.';
- sCodePointExpected = 'Code Point node expected as child at this position "%d".';
- sCollationsNodeNotFound = '"collations" node not found.';
- sCollationTypeNotFound = 'collation "Type" not found : "%s".';
- sHexAttributeExpected = '"hex" attribute expected at this position "%d".';
- sInvalidResetClause = 'Invalid "Reset" clause.';
- sNodeNameAssertMessage = 'Expected NodeName "%s", got "%s".';
- sRulesNodeNotFound = '"rules" node not found.';
- sTextNodeChildExpected = '(Child) text node expected at this position "%d", but got "%s".';
- sUniqueChildNodeExpected = 'Unique child node expected at this position "%d".';
- sUnknownResetLogicalPosition = 'Unknown reset logical position : "%s".';
+ procedure ParseCollationDocument2(
+ AStream : TStream;
+ ACollation : TCldrCollationItem;
+ AType : string
+ );overload;
implementation
uses
- typinfo, XMLRead, XPath, Helper, unicodeset;
+ typinfo, RtlConsts, XMLRead, XPath, Helper, unicodeset, cldrtxt;
const
+ s_ALT = 'alt';
s_AT = 'at';
//s_BEFORE = 'before';
s_CODEPOINT = 'codepoint';
@@ -81,12 +161,16 @@ const
//s_DEFAULT = 'default';
s_EXTEND = 'extend';
s_HEX = 'hex';
+ s_IMPORT = 'import';
s_POSITION = 'position';
s_RESET = 'reset';
s_RULES = 'rules';
+ s_SOURCE = 'source';
//s_STANDART = 'standard';
s_TYPE = 'type';
+ s_CR = 'cr';
+
procedure CheckNodeName(ANode : TDOMNode; const AExpectedName : DOMString);
begin
if (ANode.NodeName <> AExpectedName) then
@@ -124,23 +208,7 @@ begin
end;
end;
-function TryStrToLogicalReorder(
- const AValue : string;
- out AResult : TReorderLogicalReset
-) : Boolean;
-var
- s : string;
- i : Integer;
-begin
- s := StringReplace(AValue,' ','',[rfReplaceAll]);
- s := StringReplace(s,'_','',[rfReplaceAll]);
- i := GetEnumValue(TypeInfo(TReorderLogicalReset),s);
- Result := (i > -1);
- if Result then
- AResult := TReorderLogicalReset(i);
-end;
-
-function ParseStatement(
+function ParseStatementXML(
ARules : TDOMElement;
AStartPosition : Integer;
AStatement : PReorderSequence;
@@ -393,7 +461,7 @@ begin
ANextPos := i;
end;
-procedure ParseInitialDocument(ASequence : POrderedCharacters; ADoc : TDOMDocument);
+procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; ADoc : TDOMDocument);
var
n : TDOMNode;
rulesElement : TDOMElement;
@@ -412,7 +480,7 @@ begin
i := 0;
while (i < c) do begin
statement.Clear();
- if not ParseStatement(rulesElement,i,@statement,nextPost) then
+ if not ParseStatementXML(rulesElement,i,@statement,nextPost) then
Break;
i := nextPost;
try
@@ -433,13 +501,13 @@ begin
end;
end;
-procedure ParseInitialDocument(ASequence : POrderedCharacters; AFileName : string);
+procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; AFileName : string);
var
doc : TXMLDocument;
begin
ReadXMLFile(doc,AFileName);
try
- ParseInitialDocument(ASequence,doc);
+ ParseInitialDocumentXML(ASequence,doc);
finally
doc.Free();
end;
@@ -500,10 +568,10 @@ begin
it.Free();
uset.Free();
end;
- SetLength(r,0);
+ r := nil;
end;
-procedure ParseCollationItem(
+procedure ParseCollationItemXML(
ACollationNode : TDOMElement;
AItem : TCldrCollationItem;
AMode : TCldrParserMode
@@ -544,7 +612,105 @@ begin
i := 0;
while (i < c) do begin
statement^.Clear();
- if not ParseStatement(rulesElement,i,statement,nextPos) then
+ if not ParseStatementXML(rulesElement,i,statement,nextPos) then
+ Break;
+ i := nextPos;
+ Inc(statement);
+ Inc(sal);
+ if (sal >= Length(statementList)) then begin
+ SetLength(statementList,(sal*2));
+ statement := @statementList[(sal-1)];
+ end;
+ end;
+ end;
+ SetLength(statementList,sal);
+ AItem.Rules := statementList;
+ end;
+end;
+
+procedure ParseImports(ACollationNode : TDOMElement; AItem : TCldrCollationItem);
+var
+ locList : TXPathVariable;
+ i : Integer;
+ nd, locAtt : TDOMNode;
+ locSource, locType : string;
+begin
+ locList := EvaluateXPathExpression(s_IMPORT,ACollationNode);
+ try
+ if not locList.InheritsFrom(TXPathNodeSetVariable) then
+ exit;
+ for i := 0 to locList.AsNodeSet.Count-1 do begin
+ nd := TDOMNode(locList.AsNodeSet[i]);
+ if (nd.Attributes <> nil) then begin
+ locSource := '';
+ locType := '';
+ locAtt := nd.Attributes.GetNamedItem(s_SOURCE);
+ if (locAtt <> nil) then
+ locSource := locAtt.NodeValue;
+ locAtt := nd.Attributes.GetNamedItem(s_TYPE);
+ if (locAtt <> nil) then
+ locType := locAtt.NodeValue;
+ end;
+ if (locType <> '') then
+ AItem.Imports.Add(locSource,locType);
+ end;
+ finally
+ locList.Free();
+ end;
+end;
+
+procedure ParseCollationItem2(
+ ACollationNode : TDOMElement;
+ AItem : TCldrCollationItem;
+ AMode : TCldrParserMode
+);
+var
+ n : TDOMNode;
+ rulesElement : TDOMCDATASection;
+ i, c, nextPos : Integer;
+ statementList : TReorderSequenceArray;
+ sal : Integer;//statement actual length
+ statement : PReorderSequence;
+ s : DOMString;
+ u8 : UTF8String;
+ buffer : PAnsiChar;
+ lineCount : Integer;
+begin
+ AItem.TypeName := ACollationNode.GetAttribute(s_TYPE);
+ AItem.Alt := ACollationNode.GetAttribute(s_ALT);
+ AItem.Base := EvaluateXPathStr('base',ACollationNode);
+ AItem.Backwards := (EvaluateXPathStr('settings/@backwards',ACollationNode) = 'on');
+ if AItem.Backwards then
+ AItem.ChangedFields := AItem.ChangedFields + [TCollationField.BackWard];
+ ParseImports(ACollationNode,AItem);
+ AItem.Rules := nil;
+ if (AMode = TCldrParserMode.FullParsing) then begin
+ SetLength(statementList,15);
+ sal := 0;
+ statement := @statementList[0];
+ s := EvaluateXPathStr('suppress_contractions',ACollationNode);
+ if (s <> '') then begin
+ if (ParseDeletion(s,statement) > 0) then begin
+ Inc(sal);
+ Inc(statement);
+ end else begin
+ statement^.Clear();
+ end;
+ end;
+ n := ACollationNode.FindNode(s_CR);
+ if (n <> nil) then begin
+ n := (n as TDOMElement).FirstChild;
+ rulesElement := n as TDOMCDATASection;
+ s := rulesElement.Data;
+ u8 := UTF8Encode(s);
+ c := Length(u8);
+ buffer := @u8[1];
+ nextPos := 0;
+ i := 0;
+ lineCount := 0;
+ while (i < c) do begin
+ statement^.Clear();
+ if not ParseStatement(buffer,i,c,statement,nextPos,lineCount) then
Break;
i := nextPos;
Inc(statement);
@@ -560,7 +726,7 @@ begin
end;
end;
-procedure ParseCollationDocument(
+procedure ParseCollationDocumentXML(
ADoc : TDOMDocument;
ACollation : TCldrCollation;
AMode : TCldrParserMode
@@ -577,6 +743,7 @@ begin
raise Exception.Create(sCollationsNodeNotFound);
collationsElement := n as TDOMElement;
ACollation.Clear();
+ ACollation.Mode := AMode;
ACollation.Language := EvaluateXPathStr('identity/language/@type',ADoc.DocumentElement);
ACollation.Version := EvaluateXPathStr('identity/version/@number',ADoc.DocumentElement);
ACollation.DefaultType := EvaluateXPathStr('collations/default/@type',ADoc.DocumentElement);
@@ -589,7 +756,7 @@ begin
n := nl[i];
if (n.NodeName = s_COLLATION) then begin
item := TCldrCollationItem.Create();
- ParseCollationItem((n as TDOMElement),item,AMode);
+ ParseCollationItemXML((n as TDOMElement),item,AMode);
ACollation.Add(item);
item := nil;
end
@@ -601,7 +768,7 @@ begin
end;
end;
-procedure ParseCollationDocument(
+procedure ParseCollationDocumentXML(
ADoc : TDOMDocument;
ACollation : TCldrCollationItem;
AType : string
@@ -614,7 +781,68 @@ begin
if (xv.AsNodeSet.Count = 0) then
raise Exception.CreateFmt(sCollationTypeNotFound,[AType]);
ACollation.Clear();
- ParseCollationItem((TDOMNode(xv.AsNodeSet[0]) as TDOMElement),ACollation,TCldrParserMode.FullParsing);
+ ParseCollationItemXML((TDOMNode(xv.AsNodeSet[0]) as TDOMElement),ACollation,TCldrParserMode.FullParsing);
+ finally
+ xv.Free();
+ end
+end;
+
+procedure ParseCollationDocument2(
+ ADoc : TDOMDocument;
+ ACollation : TCldrCollation;
+ AMode : TCldrParserMode
+);
+var
+ n : TDOMNode;
+ collationsElement : TDOMElement;
+ i, c : Integer;
+ item : TCldrCollationItem;
+ nl : TDOMNodeList;
+begin
+ n := ADoc.DocumentElement.FindNode(s_COLLATIONS);
+ if (n = nil) then
+ raise Exception.Create(sCollationsNodeNotFound);
+ collationsElement := n as TDOMElement;
+ ACollation.Clear();
+ ACollation.Mode := AMode;
+ ACollation.Language := EvaluateXPathStr('identity/language/@type',ADoc.DocumentElement);
+ ACollation.Version := EvaluateXPathStr('identity/version/@number',ADoc.DocumentElement);
+ ACollation.DefaultType := EvaluateXPathStr('collations/defaultCollation',ADoc.DocumentElement);
+ if collationsElement.HasChildNodes() then begin
+ nl := collationsElement.ChildNodes;
+ c := nl.Count;
+ item := nil;
+ try
+ for i := 0 to c - 1 do begin
+ n := nl[i];
+ if (n.NodeName = s_COLLATION) then begin
+ item := TCldrCollationItem.Create();
+ ParseCollationItem2((n as TDOMElement),item,AMode);
+ ACollation.Add(item);
+ item := nil;
+ end
+ end;
+ except
+ FreeAndNil(item);
+ raise;
+ end;
+ end;
+end;
+
+procedure ParseCollationDocument2(
+ ADoc : TDOMDocument;
+ ACollation : TCldrCollationItem;
+ AType : string
+);
+var
+ xv : TXPathVariable;
+begin
+ xv := EvaluateXPathExpression(Format('collations/collation[@type=%s]',[QuotedStr(AType)]),ADoc.DocumentElement);
+ try
+ if (xv.AsNodeSet.Count = 0) then
+ raise Exception.CreateFmt(sCollationTypeNotFound,[AType]);
+ ACollation.Clear();
+ ParseCollationItem2((TDOMNode(xv.AsNodeSet[0]) as TDOMElement),ACollation,TCldrParserMode.FullParsing);
finally
xv.Free();
end
@@ -650,7 +878,40 @@ begin
end;
end;
-procedure ParseCollationDocument(
+procedure ParseCollationDocumentXML(
+ const AFileName : string;
+ ACollation : TCldrCollation;
+ AMode : TCldrParserMode
+);
+var
+ doc : TXMLDocument;
+begin
+ doc := ReadXMLFile(AFileName);
+ try
+ ParseCollationDocumentXML(doc,ACollation,AMode);
+ ACollation.LocalID := ExtractFileName(ChangeFileExt(AFileName,''));
+ finally
+ doc.Free();
+ end;
+end;
+
+procedure ParseCollationDocumentXML(
+ const AFileName : string;
+ ACollation : TCldrCollationItem;
+ AType : string
+);
+var
+ doc : TXMLDocument;
+begin
+ doc := ReadXMLFile(AFileName);
+ try
+ ParseCollationDocumentXML(doc,ACollation,AType);
+ finally
+ doc.Free();
+ end;
+end;
+
+procedure ParseCollationDocument2(
const AFileName : string;
ACollation : TCldrCollation;
AMode : TCldrParserMode
@@ -660,14 +921,30 @@ var
begin
doc := ReadXMLFile(AFileName);
try
- ParseCollationDocument(doc,ACollation,AMode);
+ ParseCollationDocument2(doc,ACollation,AMode);
ACollation.LocalID := ExtractFileName(ChangeFileExt(AFileName,''));
finally
doc.Free();
end;
end;
-procedure ParseCollationDocument(
+procedure ParseCollationDocument2(
+ AStream : TStream;
+ ACollation : TCldrCollation;
+ AMode : TCldrParserMode
+);
+var
+ doc : TXMLDocument;
+begin
+ doc := ReadXMLFile(AStream);
+ try
+ ParseCollationDocument2(doc,ACollation,AMode);
+ finally
+ doc.Free();
+ end;
+end;
+
+procedure ParseCollationDocument2(
const AFileName : string;
ACollation : TCldrCollationItem;
AType : string
@@ -677,10 +954,165 @@ var
begin
doc := ReadXMLFile(AFileName);
try
- ParseCollationDocument(doc,ACollation,AType);
+ ParseCollationDocument2(doc,ACollation,AType);
finally
doc.Free();
end;
end;
+procedure ParseCollationDocument2(
+ AStream : TStream;
+ ACollation : TCldrCollationItem;
+ AType : string
+);
+var
+ doc : TXMLDocument;
+begin
+ doc := ReadXMLFile(AStream);
+ try
+ ParseCollationDocument2(doc,ACollation,AType);
+ finally
+ doc.Free();
+ end;
+end;
+
+{ TCldrCollationStreamLoader }
+
+procedure TCldrCollationStreamLoader.CheckContent(ALanguage: string);
+begin
+ if not FileExists(ALanguage) then
+ raise EFOpenError.CreateFmt(SFOpenError,[ALanguage]);
+end;
+
+function TCldrCollationStreamLoader.IndexOf(ALanguage: string): Integer;
+var
+ i : Integer;
+begin
+ for i := Low(FLanguages) to High(FLanguages) do begin
+ if (FLanguages[i] = ALanguage) then begin
+ Result := i;
+ exit;
+ end;
+ end;
+ Result := -1;
+end;
+
+procedure TCldrCollationStreamLoader.LoadCollation(
+ const ALanguage : string;
+ ACollation : TCldrCollation;
+ AMode : TCldrParserMode
+);
+var
+ i : Integer;
+ locStream : TStream;
+begin
+ i := IndexOf(ALanguage);
+ if (i < 0) then
+ CheckContent(ALanguage);
+ locStream := FStreams[i];
+ locStream.Position := 0;
+ ParseCollationDocument2(locStream,ACollation,AMode);
+end;
+
+procedure TCldrCollationStreamLoader.LoadCollationType(
+ const ALanguage,
+ ATypeName : string;
+ AType : TCldrCollationItem
+);
+var
+ i : Integer;
+ locStream : TStream;
+begin
+ i := IndexOf(ALanguage);
+ if (i < 0) then
+ CheckContent(ALanguage);
+ locStream := FStreams[i];
+ locStream.Position := 0;
+ ParseCollationDocument2(locStream,AType,ATypeName);
+end;
+
+constructor TCldrCollationStreamLoader.Create(
+ const ALanguages : array of string;
+ const AStreams : array of TStream
+);
+var
+ c, i : Integer;
+begin
+ c := Length(ALanguages);
+ if (Length(AStreams) < c) then
+ c := Length(AStreams);
+ SetLength(FLanguages,c);
+ SetLength(FStreams,c);
+ for i := Low(ALanguages) to High(ALanguages) do begin
+ FLanguages[i] := ALanguages[i];
+ FStreams[i] := AStreams[i];
+ end;
+end;
+
+destructor TCldrCollationStreamLoader.Destroy();
+var
+ i : Integer;
+begin
+ for i := Low(FStreams) to High(FStreams) do
+ FreeAndNil(FStreams[i]);
+end;
+
+{ TCldrCollationFileLoader }
+
+procedure TCldrCollationFileLoader.SetPath(APath: string);
+var
+ s : string;
+begin
+ if (APath = '') then
+ s := ''
+ else
+ s := IncludeLeadingPathDelimiter(APath);
+ if (s <> FPath) then
+ FPath := s;
+end;
+
+function TCldrCollationFileLoader.BuildFileName(ALanguage: string): string;
+begin
+ Result := Format('%s%s.xml',[FPath,ALanguage]);
+end;
+
+procedure TCldrCollationFileLoader.CheckFile(AFileName: string);
+begin
+ if not FileExists(AFileName) then
+ raise EFOpenError.CreateFmt(SFOpenError,[AFileName]);
+end;
+
+procedure TCldrCollationFileLoader.LoadCollation(
+ const ALanguage : string;
+ ACollation : TCldrCollation;
+ AMode : TCldrParserMode
+);
+var
+ locFileName : string;
+begin
+ locFileName := BuildFileName(ALanguage);
+ CheckFile(locFileName);
+ ACollation.Clear();
+ ParseCollationDocument2(locFileName,ACollation,AMode);
+end;
+
+procedure TCldrCollationFileLoader.LoadCollationType(
+ const ALanguage,
+ ATypeName : string;
+ AType : TCldrCollationItem
+);
+var
+ locFileName : string;
+begin
+ locFileName := BuildFileName(ALanguage);
+ CheckFile(locFileName);
+ AType.Clear();
+ ParseCollationDocument2(locFileName,AType,ATypeName);
+end;
+
+constructor TCldrCollationFileLoader.Create(APath: string);
+begin
+ SetPath(APath);
+end;
+
end.
diff --git a/utils/unicode/grbtree.pas b/utils/unicode/grbtree.pas
index b493619a43..259fbad54b 100644
--- a/utils/unicode/grbtree.pas
+++ b/utils/unicode/grbtree.pas
@@ -419,8 +419,10 @@ begin
end;
// Stop if found
- if (cp.Compare(q^.Data,AData) = 0) then
+ if (cp.Compare(q^.Data,AData) = 0) then begin
+ Result := q;
break;
+ end;
last := dir;
dir := (cp.Compare(q^.Data,AData) < 0);
diff --git a/utils/unicode/helper.pas b/utils/unicode/helper.pas
index 1e10910f5d..b0a257af96 100644
--- a/utils/unicode/helper.pas
+++ b/utils/unicode/helper.pas
@@ -1,6 +1,6 @@
{ Unicode parser helper unit.
- Copyright (c) 2012 by Inoussa OUEDRAOGO
+ Copyright (c) 2012-2015 by Inoussa OUEDRAOGO
The source code is distributed under the Library GNU
General Public License with the following modification:
@@ -66,6 +66,7 @@ const
' but WITHOUT ANY WARRANTY; without even the implied warranty of ' + sLineBreak +
' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }';
+ WEIGHT_LEVEL_COUNT = 3;
type
// Unicode General Category
@@ -680,6 +681,14 @@ type
procedure ReverseBytes(var AData; const ALength : Integer);
procedure ReverseArray(var AValue; const AArrayLength, AItemSize : PtrInt);
+ function CalcMaxLevel2Value(ALines : array of TUCA_LineRec) : Cardinal;
+ procedure RewriteLevel2Values(ALines : PUCA_LineRec; ALength : Integer);
+ function RewriteLevel2(
+ const ALevel1Value : Cardinal;
+ ALines : PUCA_LineRec;
+ const ALinesLength : Integer
+ ) : Integer;
+
resourcestring
SInsufficientMemoryBuffer = 'Insufficient Memory Buffer';
@@ -1728,6 +1737,8 @@ var
Inc(actualPropLen);
end;
locData.PropID := k;
+ if (actualDataLen >= Length(ADataLineList)) then
+ SetLength(ADataLineList,(2*Length(ADataLineList)));
ADataLineList[actualDataLen] := locData;
Inc(actualDataLen);
end;
@@ -2477,7 +2488,7 @@ var
a := LowerCase(Trim(AToken));
b := LowerCase(Trim(NextToken()));
if (a <> b) then
- raise Exception.CreateFmt('Expected token "%s" but found "%s".',[a,b]);
+ raise Exception.CreateFmt('Expected token "%s" but found "%s", Line = "%s".',[a,b,line]);
end;
function ReadWeightBlock(var ADest : TUCA_WeightRec) : Boolean;
@@ -2498,7 +2509,7 @@ var
ADest.Variable := True;
end;
ADest.Weights[0] := StrToInt('$'+NextToken());
- for k := 1 to 3 do begin
+ for k := 1 to WEIGHT_LEVEL_COUNT-1 do begin
CheckToken('.');
ADest.Weights[k] := StrToInt('$'+NextToken());
end;
@@ -2638,8 +2649,11 @@ begin
exit(-1);
end;
-Procedure QuickSort(var AList: TUCA_DataBookIndex; L, R : Longint;
- ABook : PUCA_DataBook);
+procedure QuickSort(
+ var AList : TUCA_DataBookIndex;
+ L, R : Longint;
+ ABook : PUCA_DataBook
+);overload;
var
I, J : Longint;
P, Q : Integer;
@@ -4673,6 +4687,201 @@ begin
end;
end;
+Procedure QuickSort(AList : PCardinal; L, R : Longint);overload;
+var
+ I, J : Longint;
+ P, Q : Cardinal;
+begin
+ repeat
+ I := L;
+ J := R;
+ P := AList[ (L + R) div 2 ];
+ repeat
+ while (P > AList[i]) do
+ I := I + 1;
+ while (P < AList[J]) do
+ J := J - 1;
+ If I <= J then
+ begin
+ Q := AList[I];
+ AList[I] := AList[J];
+ AList[J] := Q;
+ I := I + 1;
+ J := J - 1;
+ end;
+ until I > J;
+ if J - L < R - I then
+ begin
+ if L < J then
+ QuickSort(AList, L, J);
+ L := I;
+ end
+ else
+ begin
+ if I < R then
+ QuickSort(AList, I, R);
+ R := J;
+ end;
+ until L >= R;
+end;
+
+function CalcMaxLevel2Count(
+ const ALevel1Value : Cardinal;
+ ALines : array of TUCA_LineRec
+) : Integer;
+var
+ i, c, k : Integer;
+ ac : Integer;
+ items : array of Cardinal;
+ p : PUCA_LineRec;
+ pw : ^TUCA_WeightRec;
+begin
+ c := Length(ALines);
+ if (c < 1) then
+ exit(0);
+ SetLength(items,0);
+ ac := 0;
+ p := @ALines[Low(ALines)];
+ for i := 0 to c-1 do begin
+ if (Length(p^.Weights) > 0) then begin
+ pw := @p^.Weights[Low(p^.Weights)];
+ for k := 0 to Length(p^.Weights)-1 do begin
+ if (pw^.Weights[0] = ALevel1Value) then begin
+ if (ac = 0) or (IndexDWord(items[0],ac,pw^.Weights[1]) < 0) then begin
+ if (ac >= Length(items)) then
+ SetLength(items,Length(items)+256);
+ items[ac] := pw^.Weights[1];
+ ac := ac+1;
+ end;
+ end;
+ Inc(pw);
+ end;
+ end;
+ Inc(p);
+ end;
+ Result := ac;
+end;
+
+function RewriteLevel2(
+ const ALevel1Value : Cardinal;
+ ALines : PUCA_LineRec;
+ const ALinesLength : Integer
+) : Integer;
+var
+ i, c, k : Integer;
+ ac : Integer;
+ items : array of Cardinal;
+ p : PUCA_LineRec;
+ pw : ^TUCA_WeightRec;
+ newValue : Cardinal;
+begin
+ c := ALinesLength;
+ if (c < 1) then
+ exit(0);
+ SetLength(items,256);
+ ac := 0;
+ p := ALines;
+ for i := 0 to c-1 do begin
+ if (Length(p^.Weights) > 0) then begin
+ for k := 0 to Length(p^.Weights)-1 do begin
+ pw := @p^.Weights[k];
+ if (pw^.Weights[0] = ALevel1Value) then begin
+ if (ac = 0) or (IndexDWord(items[0],ac,pw^.Weights[1]) < 0) then begin
+ if (ac >= Length(items)) then
+ SetLength(items,Length(items)+256);
+ items[ac] := pw^.Weights[1];
+ ac := ac+1;
+ end;
+ end;
+ end;
+ end;
+ Inc(p);
+ end;
+ SetLength(items,ac);
+ if (ac > 1) then
+ QuickSort(@items[0],0,(ac-1));
+
+ p := ALines;
+ for i := 0 to c-1 do begin
+ if (Length(p^.Weights) > 0) then begin
+ for k := 0 to Length(p^.Weights)-1 do begin
+ pw := @p^.Weights[k];
+ if (pw^.Weights[0] = ALevel1Value) then begin
+ newValue := IndexDWord(items[0],ac,pw^.Weights[1]);
+ if (newValue < 0) then
+ raise Exception.CreateFmt('level 2 value %d missed in rewrite of level 1 value of %d.',[pw^.Weights[1],ALevel1Value]);
+ pw^.Weights[1] := newValue;//+1;
+ end;
+ end;
+ end;
+ Inc(p);
+ end;
+ if (Length(items) > 0) then
+ Result := items[Length(items)-1]
+ else
+ Result := 0;
+end;
+
+procedure RewriteLevel2Values(ALines : PUCA_LineRec; ALength : Integer);
+var
+ c, i, ac, k : Integer;
+ p : PUCA_LineRec;
+ level1List : array of Cardinal;
+ pw : ^TUCA_WeightRec;
+begin
+ c := ALength;
+ if (c < 1) then
+ exit;
+ ac := 0;
+ SetLength(level1List,c);
+ p := ALines;
+ for i := 0 to c-1 do begin
+ if (Length(p^.Weights) > 0) then begin
+ for k := 0 to Length(p^.Weights)-1 do begin
+ pw := @p^.Weights[k];
+ if (ac = 0) or (IndexDWord(level1List[0],ac,pw^.Weights[0]) < 0) then begin
+ if (ac >= Length(level1List)) then
+ SetLength(level1List,ac+1000);
+ level1List[ac] := pw^.Weights[0];
+ RewriteLevel2(level1List[ac],ALines,ALength);
+ ac := ac+1;
+ end;
+ end;
+ end;
+ Inc(p);
+ end;
+end;
+
+function CalcMaxLevel2Value(ALines : array of TUCA_LineRec) : Cardinal;
+var
+ i, c, k, tempValue : Integer;
+ p : PUCA_LineRec;
+ maxLevel : Cardinal;
+ maxValue : Integer;
+begin
+ c := Length(ALines);
+ if (c < 2) then
+ exit(0);
+ maxLevel := 0;
+ maxValue := CalcMaxLevel2Count(maxLevel,ALines);
+ p := @ALines[Low(ALines)+1];
+ for i := 1 to c-1 do begin
+ if (Length(p^.Weights) > 0) then begin
+ for k := 0 to Length(p^.Weights)-1 do begin
+ if (p^.Weights[k].Weights[0] <> maxLevel) then begin
+ tempValue := CalcMaxLevel2Count(p^.Weights[k].Weights[0],ALines);
+ if (tempValue > maxValue) then begin
+ maxLevel := p^.Weights[k].Weights[0];
+ maxValue := tempValue;
+ end;
+ end;
+ end;
+ end;
+ Inc(p);
+ end;
+ Result := maxValue;
+end;
+
initialization
FS := DefaultFormatSettings;
FS.DecimalSeparator := '.';
diff --git a/utils/unicode/unicodeset.pas b/utils/unicode/unicodeset.pas
index 0be30432f4..974e8748e5 100644
--- a/utils/unicode/unicodeset.pas
+++ b/utils/unicode/unicodeset.pas
@@ -1,6 +1,6 @@
{ UnicodeSet implementation.
- Copyright (c) 2013 by Inoussa OUEDRAOGO
+ Copyright (c) 2013-2015 by Inoussa OUEDRAOGO
The source code is distributed under the Library GNU
General Public License with the following modification:
@@ -36,6 +36,8 @@ type
TUnicodeSet = class;
+ { TPatternParser }
+
TPatternParser = class
private
FBufferStr : UnicodeString;
@@ -43,6 +45,7 @@ type
FBufferLength : Integer;
FSet : TUnicodeSet;
FPosition : Integer;
+ FSpecialChar: Boolean;
private
procedure Error(const AMsg : string; const AArgs : array of const);overload;inline;
procedure Error(const AMsg : string);overload;inline;
@@ -58,6 +61,7 @@ type
function NextChar() : TUnicodeCodePoint;
procedure ParseItem();
procedure DoParse();
+ property SpecialChar : Boolean read FSpecialChar;
public
procedure Parse(const APattern : PUnicodeChar; const ALength : Integer);overload;
procedure Parse(const APattern : UnicodeString);overload;inline;
@@ -73,6 +77,8 @@ type
class function Compare(const A, B : TUnicodeCodePointArray) : Integer;static;inline;
end;
+ { TUnicodeSet }
+
TUnicodeSet = class
private type
TItem = TUnicodeCodePointArray;
@@ -84,18 +90,22 @@ type
FParser : TPatternParser;
private
procedure CreateParser();inline;
+ function InternalContains(const AString : UnicodeString) : Boolean;overload;
public
constructor Create();
destructor Destroy;override;
procedure Add(AChar : TUnicodeCodePoint);inline;overload;
procedure Add(AString : TUnicodeCodePointArray);inline;overload;
procedure AddRange(const AStart, AEnd : TUnicodeCodePoint);inline;
- procedure AddPattern(const APattern : UnicodeString);inline;
+ procedure AddPattern(const APattern : UnicodeString);inline;overload;
+ procedure AddPattern(const APattern : RawByteString);inline;overload;
function CreateIterator() : TIterator;
function Contains(const AString : array of TUnicodeCodePoint) : Boolean;overload;
function Contains(const AChar : TUnicodeCodePoint) : Boolean;inline;overload;
function Contains(const AChar : UnicodeChar) : Boolean;inline;overload;
function Contains(const AChar : AnsiChar) : Boolean;inline;overload;
+ function Contains(const AString : UnicodeString) : Boolean;overload;
+ function Contains(const AString : RawByteString) : Boolean;overload;
end;
resourcestring
@@ -267,7 +277,7 @@ begin
CheckEOF(4);
s := Copy(FBufferStr,(FPosition+1),4);
Inc(FPosition,4);
- if not TryStrToInt('$'+s,i) then
+ if not TryStrToInt(string('$'+s),i) then
Error(SExpectedBut,['\uXXXX',s]);
cp := i;
end;
@@ -281,6 +291,7 @@ begin
Inc(FPosition);
end;
end;
+ FSpecialChar := (cp = Ord('{')) or (cp = Ord('}'));
Result := cp;
end;
@@ -292,7 +303,8 @@ end;
procedure TPatternParser.ParseItem();
var
cp, lastCp : TUnicodeCodePoint;
- charCount : Integer;
+ charCount, k : Integer;
+ cpa : TUnicodeCodePointArray;
begin
SkipSpaces();
Expect('[');
@@ -304,13 +316,31 @@ begin
cp := NextChar();
if CompareTo(cp,']') then
Break;
- if CompareTo(cp,'-') then begin
- if (charCount = 0) then
- Error(SExpectedBut,['<char>','-']);
- cp := NextChar();
- FSet.AddRange(lastCp,cp);
+ if SpecialChar and (cp = Ord('{')) then begin
+ SetLength(cpa,12);
+ k := 0;
+ while True do begin
+ cp := NextChar();
+ if SpecialChar and (cp = Ord('}')) then
+ break;
+ if (k >= Length(cpa)) then
+ SetLength(cpa,(2*k));
+ cpa[k] := cp;
+ k := k+1;
+ end;
+ if (k > 0) then begin
+ SetLength(cpa,k);
+ FSet.Add(cpa);
+ end;
end else begin
- FSet.Add(cp);
+ if CompareTo(cp,'-') then begin
+ if (charCount = 0) then
+ Error(SExpectedBut,['<char>','-']);
+ cp := NextChar();
+ FSet.AddRange(lastCp,cp);
+ end else begin
+ FSet.Add(cp);
+ end;
end;
Inc(charCount);
end;
@@ -348,6 +378,22 @@ begin
end;
end;
+function TUnicodeSet.InternalContains(const AString: UnicodeString): Boolean;
+var
+ u4 : UCS4String;
+ c, i : Integer;
+ cpa : TUnicodeCodePointArray;
+begin
+ u4 := UnicodeStringToUCS4String(AString);
+ c := Length(u4)-1;
+ if (c = 1) then
+ exit(Contains(u4[0]));
+ SetLength(cpa,c);
+ for i := 0 to c-1 do
+ cpa[i] := u4[i];
+ Result := Contains(cpa);
+end;
+
constructor TUnicodeSet.Create;
begin
FTree := TTree.Create();
@@ -387,6 +433,14 @@ begin
FParser.Parse(APattern);
end;
+procedure TUnicodeSet.AddPattern(const APattern: RawByteString);
+var
+ us : UnicodeString;
+begin
+ us := UnicodeString(APattern);
+ AddPattern(us);
+end;
+
function TUnicodeSet.CreateIterator() : TIterator;
begin
Result := FTree.CreateForwardIterator();
@@ -422,5 +476,22 @@ begin
Result := Contains(TUnicodeCodePoint(Ord(AChar)));
end;
+function TUnicodeSet.Contains(const AString: UnicodeString): Boolean;
+begin
+ if (AString = '') then
+ exit(Contains([]));
+ if (Length(AString) = 1) then
+ exit(Contains(AString[1]));
+ Result := InternalContains(AString);
+end;
+
+function TUnicodeSet.Contains(const AString: RawByteString): Boolean;
+var
+ us : UnicodeString;
+begin
+ us := UnicodeString(AString);
+ Result := Contains(us);
+end;
+
end.