diff options
author | marco <marco@3ad0048d-3df7-0310-abae-a5850022a9f2> | 2016-05-24 20:05:14 +0000 |
---|---|---|
committer | marco <marco@3ad0048d-3df7-0310-abae-a5850022a9f2> | 2016-05-24 20:05:14 +0000 |
commit | 683dae6a0e5c2a0bd0169501cfddcb2af1223bb5 (patch) | |
tree | 42dea2789ef188994b4f04f8bec5d540b653c05e /utils | |
parent | 86fa73d7722e895417c9e99cd6dab852c30e48a6 (diff) | |
download | fpc-683dae6a0e5c2a0bd0169501cfddcb2af1223bb5.tar.gz |
# revisions: 33613,33629,33632,33708,33710,33728,33729,33730,33734,33742
git-svn-id: http://svn.freepascal.org/svn/fpc/branches/fixes_3_0@33786 3ad0048d-3df7-0310-abae-a5850022a9f2
Diffstat (limited to 'utils')
-rw-r--r-- | utils/unicode/cldrhelper.pas | 631 | ||||
-rw-r--r-- | utils/unicode/cldrparser.lpr | 99 | ||||
-rw-r--r-- | utils/unicode/cldrtest.pas | 2283 | ||||
-rw-r--r-- | utils/unicode/cldrtxt.pas | 687 | ||||
-rw-r--r-- | utils/unicode/cldrxml.pas | 538 | ||||
-rw-r--r-- | utils/unicode/grbtree.pas | 4 | ||||
-rw-r--r-- | utils/unicode/helper.pas | 219 | ||||
-rw-r--r-- | utils/unicode/unicodeset.pas | 91 |
8 files changed, 4403 insertions, 149 deletions
diff --git a/utils/unicode/cldrhelper.pas b/utils/unicode/cldrhelper.pas index a1495244f5..1ae155cba4 100644 --- a/utils/unicode/cldrhelper.pas +++ b/utils/unicode/cldrhelper.pas @@ -1,6 +1,6 @@ { CLDR collation helper unit. - Copyright (c) 2013 by Inoussa OUEDRAOGO + Copyright (c) 2013-2015 by Inoussa OUEDRAOGO The source code is distributed under the Library GNU General Public License with the following modification: @@ -47,9 +47,6 @@ type TUCA_LineRecArray = array of TUCA_LineRec; - -//---------------------------------------------------- - ECldrException = class(Exception) end; @@ -66,12 +63,23 @@ type FirstNonIgnorable, LastNonIgnorable, FirstTrailing, LastTrailing ); +const + FixableReorderLogicalSet = [ + TReorderLogicalReset.LastRegular,TReorderLogicalReset.FirstTrailing, + TReorderLogicalReset.LastTrailing + ]; + +type TCollationField = (BackWard, VariableLowLimit, VariableHighLimit); TCollationFields = set of TCollationField; + PReorderUnit = ^TReorderUnit; + { TReorderUnit } TReorderUnit = X_PACKED record + private + FVirtualPosition : TReorderLogicalReset; public Context : TUnicodeCodePointArray; ExpansionChars : TUnicodeCodePointArray; @@ -80,6 +88,9 @@ type InitialPosition : Integer; Changed : Boolean; public + property VirtualPosition : TReorderLogicalReset read FVirtualPosition; + function IsVirtual() : Boolean;inline; + public class function From( const AChars, AContext : array of TUnicodeCodePoint; @@ -102,15 +113,19 @@ type const AWeigthKind : TReorderWeigthKind; const AInitialPosition : Integer ) : TReorderUnit;static;overload; + class function From( + const AReset : TReorderLogicalReset + ) : TReorderUnit;static;overload; procedure SetExpansion(const AChars : array of TUnicodeCodePoint); procedure SetExpansion(const AChar : TUnicodeCodePoint); procedure Clear(); - procedure Assign(const AItem : TReorderUnit); + procedure Assign(const AItem : PReorderUnit); function HasContext() : Boolean; function IsExpansion() : Boolean; end; - PReorderUnit = ^TReorderUnit; + + PReorderSequence = ^TReorderSequence; { TReorderSequence } @@ -122,8 +137,9 @@ type Before : Boolean; public procedure Clear(); + procedure SetElementCount(const ALength : Integer); + procedure Assign(ASource : PReorderSequence); end; - PReorderSequence = ^TReorderSequence; TReorderSequenceArray = array of TReorderSequence; { TOrderedCharacters } @@ -150,28 +166,68 @@ type end; POrderedCharacters = ^TOrderedCharacters; + { TCldrImport } + + TCldrImport = class + private + FSource: string; + FTypeName: string; + public + property Source : string read FSource; + property TypeName : string read FTypeName; + end; + + { TCldrImportList } + + TCldrImportList = class + private + FItems : array of TCldrImport; + private + function GetCount: Integer; + function GetItem(AIndex : Integer): TCldrImport; + public + destructor Destroy();override; + procedure Clear(); + function IndexOf(const ASource, AType : string) : Integer; + function Find(const ASource, AType : string) : TCldrImport; + function Add(const ASource, AType : string) : TCldrImport; + property Count : Integer read GetCount; + property Item[AIndex : Integer] : TCldrImport read GetItem;default; + end; + TCldrCollation = class; { TCldrCollationItem } TCldrCollationItem = class private + FAlt: string; FBackwards: Boolean; FBase: string; FChangedFields: TCollationFields; + FImports: TCldrImportList; FParent: TCldrCollation; FRules: TReorderSequenceArray; FTypeName: string; public + constructor Create(); + destructor Destroy;override; procedure Clear(); + function IsPrivate() : Boolean; property Parent : TCldrCollation read FParent; property TypeName : string read FTypeName write FTypeName; + property Alt : string read FAlt write FAlt; property Base : string read FBase write FBase; property Backwards : Boolean read FBackwards write FBackwards; property Rules : TReorderSequenceArray read FRules write FRules; property ChangedFields : TCollationFields read FChangedFields write FChangedFields; + property Imports : TCldrImportList read FImports; end; + TCldrParserMode = (HeaderParsing, FullParsing); + + TCldrCollationRepository = class; + { TCldrCollation } TCldrCollation = class @@ -181,24 +237,81 @@ type FDefaultType: string; FVersion: string; FLanguage: string; + FMode: TCldrParserMode; + FRepository: TCldrCollationRepository; private function GetItem(Index : Integer): TCldrCollationItem; function GetItemCount: Integer; public destructor Destroy();override; procedure Clear(); - function IndexOf(const AItemName : string) : Integer; - function Find(const AItemName : string) : TCldrCollationItem; + function IndexOf(const AItemName : string) : Integer;overload; + function IndexOf(const AItemName, AItemAlt : string) : Integer;overload; + function Find(const AItemName : string) : TCldrCollationItem;overload; + function Find(const AItemName, AItemAlt : string) : TCldrCollationItem;overload; function Add(AItem : TCldrCollationItem) : Integer; + function FindPublicItemCount() : Integer; property Language : string read FLanguage write FLanguage; property LocalID : string read FLocalID write FLocalID; property Version : string read FVersion write FVersion; property DefaultType : string read FDefaultType write FDefaultType; property ItemCount : Integer read GetItemCount; property Items[Index : Integer] : TCldrCollationItem read GetItem; + property Mode : TCldrParserMode read FMode write FMode; + property Repository : TCldrCollationRepository read FRepository; end; - TCldrParserMode = (HeaderParsing, FullParsing); + ICldrCollationLoader = interface + ['{117AAC84-06CE-4EC8-9B07-4E81EC23930C}'] + procedure LoadCollation( + const ALanguage : string; + ACollation : TCldrCollation; + AMode : TCldrParserMode + ); + procedure LoadCollationType( + const ALanguage, + ATypeName : string; + AType : TCldrCollationItem + ); + end; + + { TCldrCollationRepository } + + TCldrCollationRepository = class + private + FItems : array of TCldrCollation; + FLoader: ICldrCollationLoader; + private + function GetItem(const AIndex : Integer): TCldrCollation; + function GetItemCount: Integer; + function IndexOfItem(AItem : TCldrCollation) : Integer; + procedure Add(AItem : TCldrCollation); + public + constructor Create(ALoader : ICldrCollationLoader); + destructor Destroy;override; + procedure FreeItems(); + procedure Clear(); + procedure SetLoader(AValue : ICldrCollationLoader); + function IndexOf(const ALanguage : string) : Integer; + function Find(const ALanguage : string) : TCldrCollation; + function Load(const ALanguage : string; const AMode : TCldrParserMode) : TCldrCollation; + property ItemCount : Integer read GetItemCount; + property Items[const AIndex : Integer] : TCldrCollation read GetItem; + property Loader : ICldrCollationLoader read FLoader; + end; + + TRuleVisiterFunction = + function( + ARule : PReorderSequence; + AOwner : TCldrCollationItem; + AData : Pointer + ) : Boolean; + + function ForEachRule( + ACollationType : TCldrCollationItem; + AVisitFunc : TRuleVisiterFunction; + ACustomData : Pointer + ) : Boolean; function ComputeWeigths( const AData : PReorderUnit; @@ -240,10 +353,47 @@ type const ASequenceLength : Integer ) : Integer; + function TryStrToLogicalReorder( + const AValue : string; + out AResult : TReorderLogicalReset + ) : Boolean; + + +resourcestring + sCaseNothandled = 'This case is not handled : "%s", Position = %d.'; + sCodePointExpected = 'Code Point node expected as child at this position "%d".'; + sCollationsExistsAlready = 'This collation already exists : "%s"'; + sCollationsNodeNotFound = '"collations" node not found.'; + sCollationTypeNotFound = 'collation "Type" not found : "%s".'; + sHexAttributeExpected = '"hex" attribute expected at this position "%d".'; + sInvalidResetClause = 'Invalid "Reset" clause.'; + sNodeNameAssertMessage = 'Expected NodeName "%s", got "%s".'; + sRulesNodeNotFound = '"rules" node not found.'; + sTextNodeChildExpected = '(Child) text node expected at this position "%d", but got "%s".'; + sUniqueChildNodeExpected = 'Unique child node expected at this position "%d".'; + sUnknownResetLogicalPosition = 'Unknown reset logical position : "%s".'; + sVirtualIsReadOnly = 'Virtual logical "Reset" items are read only.'; + implementation uses RtlConsts, typinfo; +function TryStrToLogicalReorder( + const AValue : string; + out AResult : TReorderLogicalReset +) : Boolean; +var + s : string; + i : Integer; +begin + s := StringReplace(AValue,' ','',[rfReplaceAll]); + s := StringReplace(s,'_','',[rfReplaceAll]); + i := GetEnumValue(TypeInfo(TReorderLogicalReset),s); + Result := (i > -1); + if Result then + AResult := TReorderLogicalReset(i); +end; + function ToStr(const ACharacters : array of TUnicodeCodePoint): string; var i : Integer; @@ -457,6 +607,11 @@ begin end; if (APosition = TReorderLogicalReset.LastNonIgnorable) then exit(c); + for i := 0 to c - 1 do begin + if (p^.VirtualPosition = APosition) then + exit(i); + Inc(p); + end; end; procedure ApplyStatementToSequence( @@ -491,6 +646,12 @@ var end else begin if (kr = 0) then exit(0); + pd := @ASequence.Data[kr]; + if pd^.IsVirtual() and (pd^.VirtualPosition in FixableReorderLogicalSet) then begin + kr := kr-1; + if (kr = 0) then + exit; + end; kk := kr; pd := @ASequence.Data[kk]; if (pd^.WeigthKind = TReorderWeigthKind.Primary) then begin @@ -518,8 +679,8 @@ begin pst := AStatement; for h := 0 to AStatementCount - 1 do begin locResetPos := -1; - if (AStatement^.LogicalPosition > TReorderLogicalReset.None) then - locResetPos := FindLogicalPos(@ASequence,AStatement^.LogicalPosition) + if (pst^.LogicalPosition > TReorderLogicalReset.None) then + locResetPos := FindLogicalPos(@ASequence,pst^.LogicalPosition) else if (Length(pst^.Reset) > 0) then begin locResetPos := IndexOf(pst^.Reset,[],@ASequence.Data[0],ASequence.ActualLength); {if (locResetPos = -1) then @@ -527,6 +688,15 @@ begin if (locResetPos = -1) then locResetPos := ASequence.ActualLength; end; + if (pst^.LogicalPosition in FixableReorderLogicalSet) then begin + if (locResetPos < 0) or + (locResetPos >= ASequence.ActualLength) or + not(ASequence.Data[locResetPos].VirtualPosition in FixableReorderLogicalSet) + then begin + locResetPos := ASequence.Append(TReorderUnit.From(pst^.LogicalPosition)); + end; + end; + pse := @pst^.Elements[0]; kr := locResetPos; k := GetNextInsertPos(); @@ -718,6 +888,47 @@ begin end; end; +function IsIgnorable(AWeight : TUCA_WeightRecArray) : Boolean; +var + i : Integer; +begin + if (Length(AWeight) = 0) then + exit(True); + for i := Low(AWeight) to High(AWeight) do begin + if (AWeight[i].Weights[0] <> 0) or + (AWeight[i].Weights[1] <> 0) or + (AWeight[i].Weights[2] <> 0) + then begin + exit(False); + end; + end; + Result := True; +end; + +function RemoveIgnorables( + AItem : TUnicodeCodePointArray; + const AList : PUCA_LineRec; + const AListLen : Integer +) : TUnicodeCodePointArray; +var + i, c, k : Integer; +begin + SetLength(Result,Length(AItem)); + c := 0; + for i := 0 to Length(AItem) - 1 do begin + k := IndexOf([AItem[i]],AList,AListLen); + if (k >= 0) and + IsIgnorable(AList[k].Weights) + then + k := -1; + if (k >= 0) then begin + Result[c] := AItem[i]; + c := c+1; + end; + end; + SetLength(Result,c); +end; + function Compress( const AData : TUCA_LineRecArray; out AResult : TUCA_LineRecArray @@ -806,7 +1017,7 @@ begin q := AData; p := AData; for i := 0 to ADataLen - 1 do begin - if p^.Changed then begin + if not(p^.IsVirtual()) and p^.Changed then begin suffixChar := p^.Characters[0]; for k := 0 to ADataLen - 1 do begin if not(q[k].Changed) and (q[k].Characters[0] = suffixChar) then begin @@ -821,7 +1032,7 @@ end; {$include weight_derivation.inc} -function ComputeWeigths( +function InternalComputeWeigths( const AData : PReorderUnit; const ADataLen : Integer; const ADataWeigths : TUCA_LineRecArray; @@ -1116,6 +1327,41 @@ begin Result := Length(AResult); end; +function ComputeWeigths( + const AData : PReorderUnit; + const ADataLen : Integer; + const ADataWeigths : TUCA_LineRecArray; + out AResult : TUCA_LineRecArray +) : Integer; +var + locData : array of TReorderUnit; + i, actualLength : Integer; + p : PReorderUnit; +begin + SetLength(locData,ADataLen); + actualLength := 0; + p := AData; + for i := 0 to ADataLen-1 do begin + if not p^.IsVirtual() then begin + locData[actualLength].Assign(p); + actualLength := actualLength+1; + end; + Inc(p); + end; + if (Length(locData) <> actualLength) then + SetLength(locData,actualLength); + Result := InternalComputeWeigths(@locData[0],actualLength,ADataWeigths,AResult); + + p := AData; + for i := 0 to actualLength-1 do begin + while p^.IsVirtual() do begin + Inc(p); + end; + p^.Assign(@locData[i]); + Inc(p); + end; +end; + function FillInitialPositions( AData : PReorderUnit; const ADataLen : Integer; @@ -1125,6 +1371,7 @@ var locNotFound, i, cw : Integer; p : PReorderUnit; pw : PUCA_LineRec; + chars : TUnicodeCodePointArray; begin locNotFound := 0; cw := Length(ADataWeigths); @@ -1135,6 +1382,10 @@ begin p := AData; for i := 0 to ADataLen - 1 do begin p^.InitialPosition := IndexOf(p^.Characters,pw,cw) + 1; + if (p^.InitialPosition = 0) then begin + chars := RemoveIgnorables(p^.Characters,pw,cw); + p^.InitialPosition := IndexOf(chars,pw,cw) + 1; + end; if (p^.InitialPosition = 0) then Inc(locNotFound); Inc(p); @@ -1142,8 +1393,212 @@ begin Result := locNotFound; end; +{ TCldrImportList } + +function TCldrImportList.GetCount: Integer; +begin + Result := Length(FItems); +end; + +function TCldrImportList.GetItem(AIndex : Integer): TCldrImport; +begin + if (AIndex < 0) or (AIndex >= Length(FItems)) then + raise ERangeError.CreateFmt(SListIndexError,[AIndex]); + Result := FItems[AIndex]; +end; + +destructor TCldrImportList.Destroy(); +begin + Clear(); + inherited; +end; + +procedure TCldrImportList.Clear(); +var + i : Integer; +begin + for i := Low(FItems) to High(FItems) do + FreeAndNil(FItems[i]); + SetLength(FItems,0); +end; + +function TCldrImportList.IndexOf(const ASource, AType: string): Integer; +var + i : Integer; +begin + for i := Low(FItems) to High(FItems) do begin + if (FItems[i].Source = ASource) and (FItems[i].TypeName = AType) then begin + Result := i; + exit; + end; + end; + Result := -1; +end; + +function TCldrImportList.Find(const ASource, AType: string): TCldrImport; +var + i : Integer; +begin + i := IndexOf(ASource,AType); + if (i >= 0) then + Result := FItems[i] + else + Result := nil; +end; + +function TCldrImportList.Add(const ASource, AType: string): TCldrImport; +var + i : Integer; +begin + i := IndexOf(ASource,AType); + if (i >= 0) then begin + Result := FItems[i]; + end else begin + Result := TCldrImport.Create(); + Result.FSource := ASource; + Result.FTypeName := AType; + i := Length(FItems); + SetLength(FItems,(i+1)); + FItems[i] := Result; + end; +end; + +{ TCldrCollationRepository } + +function TCldrCollationRepository.GetItem(const AIndex : Integer): TCldrCollation; +begin + if (AIndex < 0) or (AIndex >= Length(FItems)) then + raise ERangeError.CreateFmt(SListIndexError,[AIndex]); + Result := FItems[AIndex]; +end; + +function TCldrCollationRepository.GetItemCount: Integer; +begin + Result := Length(FItems); +end; + +function TCldrCollationRepository.IndexOfItem(AItem: TCldrCollation): Integer; +var + i : Integer; +begin + for i := Low(FItems) to High(FItems) do begin + if (FItems[i] = AItem) then begin + Result := i; + exit; + end; + end; + Result := -1; +end; + +procedure TCldrCollationRepository.Add(AItem: TCldrCollation); +var + i : Integer; +begin + if (AItem = nil) then + raise EArgumentException.CreateFmt(SParamIsNil,['AItem: TCldrCollation']); + if (IndexOfItem(AItem) >= 0) then + raise EArgumentException.CreateFmt(sCollationsExistsAlready,[AItem.Language]); + i := Length(FItems); + SetLength(FItems,(i+1)); + AItem.FRepository := Self; + FItems[i] := AItem; +end; + +constructor TCldrCollationRepository.Create(ALoader: ICldrCollationLoader); +begin + if (ALoader = nil) then + raise EArgumentException.CreateFmt(SInvalidPropertyElement,['Loader']); + SetLoader(ALoader); +end; + +destructor TCldrCollationRepository.Destroy; +begin + Clear(); + inherited Destroy; +end; + +procedure TCldrCollationRepository.FreeItems(); +var + i : Integer; +begin + for i := 0 to Length(FItems) - 1 do + FreeAndNil(FItems[i]); + SetLength(FItems,0); +end; + +procedure TCldrCollationRepository.Clear(); +begin + FreeItems(); +end; + +procedure TCldrCollationRepository.SetLoader(AValue: ICldrCollationLoader); +begin + if (FLoader <> AValue) then + FLoader := AValue; +end; + +function TCldrCollationRepository.IndexOf(const ALanguage: string): Integer; +var + i : Integer; +begin + for i := Low(FItems) to High(FItems) do begin + if (FItems[i].Language = ALanguage) then begin + Result := i; + exit; + end + end; + Result := -1; +end; + +function TCldrCollationRepository.Find(const ALanguage: string): TCldrCollation; +var + i : Integer; +begin + i := IndexOf(ALanguage); + if (i >= 0) then + Result := FItems[i] + else + Result := nil; +end; + +function TCldrCollationRepository.Load( + const ALanguage : string; + const AMode : TCldrParserMode +) : TCldrCollation; +var + isnew : Boolean; +begin + Result := Find(ALanguage); + if (Result <> nil) then begin + if (Result.Mode = TCldrParserMode.FullParsing) or (Result.Mode = AMode) then + exit; + end; + isnew := (Result = nil); + if isnew then + Result := TCldrCollation.Create(); + try + Loader.LoadCollation(ALanguage,Result,AMode); + Add(Result); + except + if isnew then + FreeAndNil(Result); + raise; + end; +end; + { TCldrCollationItem } +constructor TCldrCollationItem.Create; +begin + FImports := TCldrImportList.Create(); +end; + +destructor TCldrCollationItem.Destroy; +begin + FImports.Free(); + inherited Destroy; +end; + procedure TCldrCollationItem.Clear(); begin FBackwards := False; @@ -1151,6 +1606,12 @@ begin FChangedFields := []; SetLength(FRules,0); FTypeName := ''; + FImports.Clear(); +end; + +function TCldrCollationItem.IsPrivate() : Boolean; +begin + Result := (Pos('private-',TypeName) = 1); end; { TCldrCollation } @@ -1195,6 +1656,20 @@ begin Result := -1; end; +function TCldrCollation.IndexOf(const AItemName, AItemAlt: string): Integer; +var + i : Integer; +begin + for i := 0 to ItemCount - 1 do begin + if SameText(AItemName,Items[i].TypeName) and + SameText(AItemAlt,Items[i].Alt) + then begin + exit(i); + end; + end; + Result := -1; +end; + function TCldrCollation.Find(const AItemName: string): TCldrCollationItem; var i : Integer; @@ -1206,6 +1681,17 @@ begin Result := Items[i]; end; +function TCldrCollation.Find(const AItemName, AItemAlt: string): TCldrCollationItem; +var + i : Integer; +begin + i := IndexOf(AItemName,AItemAlt); + if (i = - 1) then + Result := nil + else + Result := Items[i]; +end; + function TCldrCollation.Add(AItem: TCldrCollationItem): Integer; begin Result := Length(FItems); @@ -1214,6 +1700,18 @@ begin AItem.FParent := Self; end; +function TCldrCollation.FindPublicItemCount() : Integer; +var + r, i : Integer; +begin + r := 0; + for i := 0 to ItemCount-1 do begin + if not Items[i].IsPrivate() then + r := r+1; + end; + Result := r; +end; + { TReorderSequence } procedure TReorderSequence.Clear(); @@ -1224,8 +1722,34 @@ begin Before := False; end; +procedure TReorderSequence.SetElementCount(const ALength: Integer); +begin + SetLength(Elements,ALength); +end; + +procedure TReorderSequence.Assign(ASource: PReorderSequence); +var + c, i : Integer; +begin + if (ASource = nil) then begin + Self.Clear(); + exit; + end; + Self.Reset := Copy(ASource^.Reset); + c := Length(ASource^.Elements); + SetLength(Self.Elements,c); + for i := 0 to c-1 do + Self.Elements[i].Assign(@ASource^.Elements[i]); + Self.Before := ASource^.Before; +end; + { TReorderUnit } +function TReorderUnit.IsVirtual() : Boolean; +begin + Result := (FVirtualPosition > TReorderLogicalReset.None); +end; + class function TReorderUnit.From( const AChars, AContext : array of TUnicodeCodePoint; @@ -1235,6 +1759,7 @@ class function TReorderUnit.From( var c : Integer; begin + Result.Clear(); c := Length(AChars); SetLength(Result.Characters,c); if (c > 0) then @@ -1276,10 +1801,19 @@ begin Result := From([AChar],AContext,AWeigthKind,AInitialPosition); end; +class function TReorderUnit.From(const AReset: TReorderLogicalReset): TReorderUnit; +begin + Result.Clear(); + Result.FVirtualPosition := AReset; +end; + procedure TReorderUnit.SetExpansion(const AChars: array of TUnicodeCodePoint); var c : Integer; begin + if IsVirtual() then + raise ECldrException.Create(sVirtualIsReadOnly); + c := Length(AChars); SetLength(ExpansionChars,c); if (c > 0) then @@ -1288,11 +1822,15 @@ end; procedure TReorderUnit.SetExpansion(const AChar: TUnicodeCodePoint); begin + if IsVirtual() then + raise ECldrException.Create(sVirtualIsReadOnly); + SetExpansion([AChar]); end; procedure TReorderUnit.Clear(); begin + Self.FVirtualPosition := TReorderLogicalReset(0); Self.Characters := nil; Self.Context := nil; Self.ExpansionChars := nil; @@ -1301,16 +1839,19 @@ begin Self.Changed := False; end; -procedure TReorderUnit.Assign(const AItem : TReorderUnit); +procedure TReorderUnit.Assign(const AItem : PReorderUnit); begin Clear(); - Self.Characters := Copy(AItem.Characters); - //SetLength(Self.Context,Length(AItem.Context)); - Self.Context := Copy(AItem.Context); - Self.ExpansionChars := Copy(AItem.ExpansionChars); - Self.WeigthKind := AItem.WeigthKind; - Self.InitialPosition := AItem.InitialPosition; - Self.Changed := AItem.Changed; + if (AItem <> nil) then begin + Self.FVirtualPosition := AItem^.VirtualPosition; + Self.Characters := Copy(AItem^.Characters); + //SetLength(Self.Context,Length(AItem^.Context)); + Self.Context := Copy(AItem^.Context); + Self.ExpansionChars := Copy(AItem^.ExpansionChars); + Self.WeigthKind := AItem^.WeigthKind; + Self.InitialPosition := AItem^.InitialPosition; + Self.Changed := AItem^.Changed; + end; end; function TReorderUnit.HasContext() : Boolean; @@ -1365,7 +1906,7 @@ begin Result.Clear(); SetLength(Result.Data,Self.ActualLength); for i := 0 to Length(Result.Data) - 1 do - Result.Data[i].Assign(Self.Data[i]); + Result.Data[i].Assign(@Self.Data[i]); Result.FActualLength := Self.FActualLength; end; @@ -1381,13 +1922,13 @@ begin if (ActualLength=0) then begin EnsureSize(ActualLength + 1); p := @Data[0]; - p^.Assign(AItem); + p^.Assign(@AItem); p^.Changed := True; exit(0); end; k := IndexOf(AItem.Characters,AItem.Context,@Data[0],ActualLength); if (k = ADestPos) then begin - Data[ADestPos].Assign(AItem); + Data[ADestPos].Assign(@AItem); Data[ADestPos].Changed := True; exit(k); end; @@ -1399,7 +1940,7 @@ begin Data[c].Clear(); p := @Data[finalPos]; if (finalPos = ActualLength) then begin - p^.Assign(AItem); + p^.Assign(@AItem); p^.Changed := True; end else begin if (c > 0) then begin @@ -1416,7 +1957,7 @@ begin (ActualLength-(finalPos+1))*SizeOf(TReorderUnit) );} FillChar(Pointer(p)^,SizeOf(TReorderUnit),0); - p^.Assign(AItem); + p^.Assign(@AItem); p^.Changed := True; end; if (k >= 0) then begin @@ -1636,6 +2177,42 @@ begin CompareProps(ASource^.Items, PUCA_PropItemRec(@y[0]),ASource^.ItemSize); end; +function ForEachRule( + ACollationType : TCldrCollationItem; + AVisitFunc : TRuleVisiterFunction; + ACustomData : Pointer +) : Boolean; +var + i : Integer; + locImport : TCldrImport; + locRep : TCldrCollationRepository; + locCollation : TCldrCollation; + locType : TCldrCollationItem; + locRules : TReorderSequenceArray; +begin + Result := False; + if not Assigned(AVisitFunc) then + exit; + if (ACollationType.Imports.Count > 0) then begin + locRep := ACollationType.Parent.Repository; + for i := 0 to ACollationType.Imports.Count-1 do begin + locImport := ACollationType.Imports[i]; + locCollation := locRep.Load(locImport.Source,TCldrParserMode.FullParsing); + locType := locCollation.Find(locImport.TypeName); + if (locType = nil) then + raise ECldrException.CreateFmt(sCollationTypeNotFound,[locImport.TypeName]); + if not ForEachRule(locType,AVisitFunc,ACustomData) then + exit; + end; + end; + locRules := ACollationType.Rules; + for i := Low(locRules) to High(locRules) do begin + if not AVisitFunc(@locRules[i],ACollationType,ACustomData) then + exit; + end; + Result := True; +end; + procedure GenerateCdlrCollation( ACollation : TCldrCollation; AItemName : string; diff --git a/utils/unicode/cldrparser.lpr b/utils/unicode/cldrparser.lpr index a8d98796c1..d4030de3f0 100644 --- a/utils/unicode/cldrparser.lpr +++ b/utils/unicode/cldrparser.lpr @@ -1,5 +1,5 @@ { Unicode CLDR's collation parser. - Copyright (c) 2013 by Inoussa OUEDRAOGO + Copyright (c) 2013-2015 by Inoussa OUEDRAOGO It creates units from CLDR's collation files. @@ -23,16 +23,18 @@ program cldrparser; {$mode objfpc}{$H+} { $define WINCE_TEST} +{$TYPEDADDRESS ON} uses SysUtils, classes, getopts,{$ifdef WINCE}StreamIO,{$endif} - cldrhelper, helper, cldrtest, cldrxml, unicodeset; + cldrhelper, helper, cldrtest, cldrxml, unicodeset, cldrtxt; const + SROOT_RULES_FILE = 'UCA_Rules_SHORT.txt'; SUsageText = 'This program creates pascal units from CLDR''s collation files for usage ' + sLineBreak + 'with the FreePascal Native Unicode Manager.' + sLineBreak + sLineBreak + - 'Usage : cldrparser <collationFileName> [<typeName>] [-d<dataDir>] [-o<outputDir>] [-t]' + sLineBreak + sLineBreak + + 'Usage : cldrparser <collationFileName> [<typeName>] [-a<alt>] [-d<dataDir>] [-o<outputDir>] [-t<HaltOnFail>]' + sLineBreak + sLineBreak + ' where :' + sLineBreak + ' ' + sLineBreak + ' - collationFileName : specify the target file.' + sLineBreak + @@ -43,24 +45,30 @@ const ' * the type named "standard" ' + sLineBreak + ' * the type named "search" ' + sLineBreak + ' * the first type.' + sLineBreak + + ' - a : this provides the "alt" property to select specific "type".' + sLineBreak + ' - dataDir : specify the directory that contains the collation files.' + sLineBreak + ' The default value is the program''s directory.' + sLineBreak + ' - outputDir : specify the directory where the generated files will be stored.' + sLineBreak + ' The default value is the program''s directory.' + sLineBreak + ' - t : to execute parser the test suite. The program will execute only the test suite and exit.' + sLineBreak + + ' <HaltOnFail> may be one of (y, Y, t, T, 1) to halt the execution on the first failing.' + sLineBreak + ' ' + sLineBreak + ' The program expects some files to be present in the <dataDir> folder : ' + sLineBreak + - ' - UCA_Rules_SHORT.xml found in the CollationAuxiliary.zip available on unicode.org' + sLineBreak + - ' - allkeys.txt this is the file allkeys_CLDR.txt contained in CollationAuxiliary.zip renamed to allkeys.txt' + sLineBreak + - ' The CollationAuxiliary.zip archive is provided by unicode in the "unicode collation algorithm data files" section.'; + ' - UCA_Rules_SHORT.xml ' + sLineBreak + + ' - allkeys.txt this is the file allkeys_CLDR.txt renamed to allkeys.txt' + sLineBreak + + ' These files are in the core.zip file of the CLDR release files. The CLDR''version used should be synchronized the' + sLineBreak + + ' version of the Unicode version used, for example for Uniocde 7 it will be CLDR 26.' + sLineBreak + + ' The CLDR files are provided by the Unicode Consortium at http://cldr.unicode.org/index/downloads'; function ParseOptions( var ADataDir, AOuputDir, ACollationFileName, - ACollationTypeName : string; - var AExecTestSuite : Boolean + ACollationTypeName, + ACollationTypeAlt : string; + var AExecTestSuite, + ATestHaltOnFail : Boolean ) : Boolean; var c : Char; @@ -78,8 +86,9 @@ begin Result := True; AExecTestSuite := False; repeat - c := GetOpt('d:o:ht'); + c := GetOpt('a:d:o:ht:'); case c of + 'a' : ACollationTypeAlt := Trim(OptArg); 'd' : ADataDir := ExpandFileName(Trim(OptArg)); 'o' : AOuputDir := ExpandFileName(Trim(OptArg)); 'h', '?' : @@ -87,7 +96,12 @@ begin WriteLn(SUsageText); Result := False; end; - 't' : AExecTestSuite := True; + 't' : + begin + AExecTestSuite := True; + s := Trim(OptArg); + ATestHaltOnFail := (s <> '') and CharInSet(s[1],['y','Y','t','T','1']); + end; end; until (c = EndOfOptions); idx := 0; @@ -109,12 +123,12 @@ var orderedChars : TOrderedCharacters; ucaBook : TUCA_DataBook; stream, streamNE, streamOE, binaryStreamNE, binaryStreamOE : TMemoryStream; - s, collationFileName, collationTypeName : string; + s, collationFileName, collationTypeName, collationTypeAlt : string; i , c: Integer; collation : TCldrCollation; dataPath, outputPath : string; collationItem : TCldrCollationItem; - testSuiteFlag : Boolean; + testSuiteFlag, testSuiteHaltOnFailFlag : Boolean; {$ifdef WINCE} fs : TFileStream; {$endif WINCE} @@ -147,11 +161,20 @@ begin outputPath := ''; collationFileName := ''; collationTypeName := ''; + collationTypeAlt := ''; testSuiteFlag := False; - if not ParseOptions(dataPath,outputPath,collationFileName,collationTypeName,testSuiteFlag) then + testSuiteHaltOnFailFlag := True; + if not ParseOptions( + dataPath,outputPath,collationFileName,collationTypeName, + collationTypeAlt,testSuiteFlag,testSuiteHaltOnFailFlag + ) + then begin + WriteLn(SUsageText); Halt(1); + end; if testSuiteFlag then begin - exec_tests(); + WriteLn('Executing the test suite ...'); + exec_tests(testSuiteHaltOnFailFlag); Halt; end; if (dataPath <> '') and not(DirectoryExists(dataPath)) then begin @@ -173,11 +196,11 @@ begin end; {$endif WINCE_TEST} if not( - FileExists(dataPath+'UCA_Rules_SHORT.xml') and + FileExists(dataPath+SROOT_RULES_FILE) and FileExists(dataPath+'allkeys.txt') ) then begin - WriteLn(Format('File not found : %s or %s.',[dataPath+'UCA_Rules_SHORT.xml',dataPath+'allkeys.txt'])); + WriteLn(Format('File not found : %s or %s.',[dataPath+SROOT_RULES_FILE,dataPath+'allkeys.txt'])); Halt(1); end; @@ -195,22 +218,35 @@ begin binaryStreamOE := nil; collation := TCldrCollation.Create(); try - ParseCollationDocument(collationFileName,collation,TCldrParserMode.HeaderParsing); - WriteLn(Format(' Collation Count = %d',[collation.ItemCount])); - if (collation.ItemCount = 0) then begin + ParseCollationDocument2(collationFileName,collation,TCldrParserMode.HeaderParsing); + WriteLn(Format(' Collation Count = %d',[collation.FindPublicItemCount()])); + if (collation.FindPublicItemCount() = 0) then begin WriteLn('No collation in this file.'); end else begin - for i := 0 to collation.ItemCount - 1 do - WriteLn(Format(' Item[%d] = (Type = %s)',[i, collation.Items[i].TypeName])); - collationItem := collation.Find(collationTypeName); + for i := 0 to collation.ItemCount - 1 do begin + if not collation.Items[i].IsPrivate() then begin + s := collation.Items[i].TypeName; + if (collation.Items[i].Alt <> '') then + s := s + ', Alt = ' + collation.Items[i].Alt; + WriteLn(Format(' Item[%d] = (Type = %s)',[i,s])); + end; + end; + if (collationTypeAlt = '') then + collationItem := collation.Find(collationTypeName) + else + collationItem := collation.Find(collationTypeName,collationTypeAlt); if (collationItem = nil) then begin collationTypeName := FindCollationDefaultItemName(collation); collationItem := collation.Find(collationTypeName); + collationTypeAlt := collationItem.Alt; end; - WriteLn(Format('Parsing Collation Item "%s" ...',[collationTypeName])); - ParseCollationDocument(collationFileName,collationItem,collationTypeName); + s := collationTypeName; + if (collationTypeAlt <> '') then + s := Format('%s (%s)',[s,collationTypeAlt]); + WriteLn(Format('Parsing Collation Item "%s" ...',[s])); + ParseCollationDocument2(collationFileName,collationItem,collationTypeName); - s := dataPath + 'UCA_Rules_SHORT.xml'; + s := dataPath + SROOT_RULES_FILE; WriteLn; WriteLn('Parsing ',QuotedStr(s),' ...'); FillByte(orderedChars,SizeOf(orderedChars),0); @@ -223,6 +259,9 @@ begin s := dataPath + 'allkeys.txt'; stream.LoadFromFile(s); ParseUCAFile(stream,ucaBook); + //WriteLn(' LEVEL-2''s items Value = ',CalcMaxLevel2Value(ucaBook.Lines)); + //RewriteLevel2Values(@ucaBook.Lines[0],Length(ucaBook.Lines)); + //WriteLn(' LEVEL-2''s items Value (after rewrite) = ',CalcMaxLevel2Value(ucaBook.Lines)); c := FillInitialPositions(@orderedChars.Data[0],orderedChars.ActualLength,ucaBook.Lines); if (c > 0) then WriteLn(' Missed Initial Positions = ',c); @@ -240,18 +279,18 @@ begin binaryStreamNE,binaryStreamOE, orderedChars,ucaBook.Lines ); - stream.SaveToFile(ExtractFilePath(collationFileName)+s); + stream.SaveToFile(outputPath+s); if (streamNE.Size > 0) then begin - streamNE.SaveToFile(ExtractFilePath(collationFileName)+GenerateEndianIncludeFileName(s,ENDIAN_NATIVE)); - streamOE.SaveToFile(ExtractFilePath(collationFileName)+GenerateEndianIncludeFileName(s,ENDIAN_NON_NATIVE)); + streamNE.SaveToFile(outputPath+GenerateEndianIncludeFileName(s,ENDIAN_NATIVE)); + streamOE.SaveToFile(outputPath+GenerateEndianIncludeFileName(s,ENDIAN_NON_NATIVE)); end; if (binaryStreamNE.Size > 0) then begin binaryStreamNE.SaveToFile( - ExtractFilePath(collationFileName) + + outputPath + ChangeFileExt(s,Format('_%s.bco',[ENDIAN_SUFFIX[ENDIAN_NATIVE]])) ); binaryStreamOE.SaveToFile( - ExtractFilePath(collationFileName) + + outputPath + ChangeFileExt(s,Format('_%s.bco',[ENDIAN_SUFFIX[ENDIAN_NON_NATIVE]])) ); end; diff --git a/utils/unicode/cldrtest.pas b/utils/unicode/cldrtest.pas index b5b74e545a..6f0abcbbb9 100644 --- a/utils/unicode/cldrtest.pas +++ b/utils/unicode/cldrtest.pas @@ -1,6 +1,6 @@ { CLDR collation Algorithm test routines. - Copyright (c) 2013 by Inoussa OUEDRAOGO + Copyright (c) 2013-2015 by Inoussa OUEDRAOGO The source code is distributed under the Library GNU General Public License with the following modification: @@ -29,7 +29,7 @@ interface uses Classes, SysUtils, - helper, cldrhelper, unicodedata; + unicodeset, helper, cldrhelper, unicodedata, cldrtxt, cldrxml; function ToAnsiChars(const AValue : array of TUnicodeCodePoint) : string; function DumpSequenceAnsi(const ASequence : TOrderedCharacters) : string; @@ -42,7 +42,7 @@ uses function ToWeight(const APrimary, ASecondary, ATertiary : Cardinal) : TUCA_WeightRecArray;overload; function ToWeight(const AWeigths : array of Cardinal) : TUCA_WeightRecArray;overload; - procedure exec_tests(); + procedure exec_tests(const APropagateException : Boolean = True); procedure test1(); procedure test2(); @@ -59,41 +59,363 @@ uses procedure test13(); procedure test14(); procedure test15(); + procedure test16a(); + procedure test16b(); + procedure test16c(); + procedure test16d(); + procedure test16e(); + + procedure test_parser_1(); + procedure test_parser_2(); + procedure test_parser_3(); + procedure test_parser_4(); + procedure test_parser_5(); + procedure test_parser_6(); + procedure test_parser_7(); + procedure test_parser_8(); + procedure test_parser_9(); + procedure test_parser_abreviating_1(); + procedure test_parser_abreviating_2(); + procedure test_parser_abreviating_3(); + procedure test_parser_abreviating_4(); + procedure test_parser_abreviating_5(); + procedure test_parser_abreviating_6(); + procedure test_parser_abreviating_7(); + procedure test_parser_abreviating_8(); + procedure test_parser_abreviating_9(); + procedure test_parser_abreviating_10(); + procedure test_parser_contraction_1(); + procedure test_parser_contraction_2(); + procedure test_parser_expansion_1(); + procedure test_parser_special_char_1(); + procedure test_parser_special_char_2(); + procedure test_parser_special_char_3(); + procedure test_parser_special_char_4(); + procedure test_parser_special_char_5(); + procedure test_parser_special_char_6(); + procedure test_parser_special_char_7(); + procedure test_parser_skip_comment_1(); + procedure test_parser_skip_comment_2(); + procedure test_parser_skip_comment_3(); + procedure test_parser_quoted_string_1(); + procedure test_parser_quoted_string_2(); + procedure test_parser_quoted_string_3(); + procedure test_parser_quoted_string_4(); + procedure test_parser_quoted_string_5(); + procedure test_parser_quoted_string_6(); + procedure test_parser_quoted_string_7(); + procedure test_parser_quoted_string_8(); + procedure test_parser_contexte_before_1(); + procedure test_parser_contexte_before_2(); + procedure test_parser_contexte_before_3(); + procedure test_parser_contexte_before_4(); + procedure test_parser_placement_before_1(); + procedure test_parser_placement_before_2(); + procedure test_parser_placement_before_3(); + procedure test_parser_multi_unit_statement_line_1(); + procedure test_parser_multi_unit_statement_line_2(); + procedure test_parser_multi_unit_statement_line_3(); + procedure test_parser_multi_statement_line_1(); + procedure test_parser_multi_statement_line_2(); + procedure test_parser_multi_statement_line_3(); + procedure test_parser_multi_statement_line_4(); + + procedure test_parser_multi_line_statements_1(); + + procedure test_collation_parser_HeaderParsing(); + procedure test_collation_parser_HeaderParsing_2(); + procedure test_collation_parser_FullParsing(); + procedure test_collation_parser_FullParsing_2(); + procedure test_collation_parser_complete_rules(); + procedure test_collation_parser_complete_rules_2(); + + procedure test_unicode_set_1(); + procedure test_unicode_set_2(); + procedure test_unicode_set_3(); implementation +uses + typinfo; + +procedure do_exec_test(ATest : TProcedure; const APropagateException : Boolean); +begin + if APropagateException then begin + ATest(); + end else begin + try + ATest(); + except + on e : Exception do begin + writeln(e.Message); + end; + end; + end; +end; -procedure exec_tests(); +procedure exec_utils_tests(const APropagateException : Boolean); begin + WriteLn;WriteLn;WriteLn;WriteLn; + WriteLn('UTILITIES TESTS - START'); + WriteLn('***************************** TEST UNICODESET 1 ******************'); + do_exec_test(@test_unicode_set_1,APropagateException); + WriteLn; + WriteLn('***************************** TEST UNICODESET 2 ******************'); + do_exec_test(@test_unicode_set_2,APropagateException); + WriteLn; + WriteLn('***************************** TEST UNICODESET 3 ******************'); + do_exec_test(@test_unicode_set_3,APropagateException); + WriteLn; + WriteLn('UTILITIES TESTS - START'); + WriteLn; + WriteLn; +end; + +procedure exec_parser_tests(const APropagateException : Boolean); +begin + WriteLn;WriteLn;WriteLn;WriteLn; + WriteLn('PARSER TESTS'); + WriteLn('***************************** TEST PARSER 1 ******************'); + do_exec_test(@test_parser_1,APropagateException); + WriteLn; + WriteLn('***************************** TEST PARSER 2 ******************'); + do_exec_test(@test_parser_2,APropagateException); + WriteLn; + WriteLn('***************************** TEST PARSER 3 ******************'); + do_exec_test(@test_parser_3,APropagateException); + WriteLn; + WriteLn('***************************** TEST PARSER 4 ******************'); + do_exec_test(@test_parser_4,APropagateException); + WriteLn; + WriteLn('***************************** TEST PARSER 5 ******************'); + do_exec_test(@test_parser_5,APropagateException); + WriteLn; + WriteLn('***************************** TEST PARSER 6 ******************'); + do_exec_test(@test_parser_6,APropagateException); + WriteLn; + WriteLn('***************************** TEST PARSER 7 ******************'); + do_exec_test(@test_parser_7,APropagateException); + WriteLn; + WriteLn('***************************** TEST PARSER 8 ******************'); + do_exec_test(@test_parser_7,APropagateException); + WriteLn; + WriteLn('***************************** TEST PARSER 9 ******************'); + do_exec_test(@test_parser_9,APropagateException); + WriteLn; + WriteLn; + WriteLn('***************************** TEST ABREVIATING 1 ******************'); + do_exec_test(@test_parser_abreviating_1,APropagateException); + WriteLn; + WriteLn('***************************** TEST ABREVIATING 2 ******************'); + do_exec_test(@test_parser_abreviating_2,APropagateException); + WriteLn; + WriteLn('***************************** TEST ABREVIATING 3 ******************'); + do_exec_test(@test_parser_abreviating_3,APropagateException); + WriteLn; + WriteLn('***************************** TEST ABREVIATING 4 ******************'); + do_exec_test(@test_parser_abreviating_4,APropagateException); + WriteLn; + WriteLn('***************************** TEST ABREVIATING 5 ******************'); + do_exec_test(@test_parser_abreviating_5,APropagateException); + WriteLn; + WriteLn('***************************** TEST ABREVIATING 6 ******************'); + do_exec_test(@test_parser_abreviating_6,APropagateException); + WriteLn; + WriteLn('***************************** TEST ABREVIATING 7 ******************'); + do_exec_test(@test_parser_abreviating_7,APropagateException); + WriteLn; + WriteLn('***************************** TEST ABREVIATING 8 ******************'); + do_exec_test(@test_parser_abreviating_8,APropagateException); + WriteLn; + WriteLn('***************************** TEST ABREVIATING 9 ******************'); + do_exec_test(@test_parser_abreviating_9,APropagateException); + WriteLn; + WriteLn; + WriteLn('***************************** TEST ABREVIATING 10 ******************'); + do_exec_test(@test_parser_abreviating_10,APropagateException); + WriteLn; + WriteLn('***************************** TEST CONTRACTION 1 ******************'); + do_exec_test(@test_parser_contraction_1,APropagateException); + WriteLn; + WriteLn('***************************** TEST CONTRACTION 2 ******************'); + do_exec_test(@test_parser_contraction_2,APropagateException); + WriteLn; + WriteLn('***************************** TEST EXPANSION 1 ******************'); + do_exec_test(@test_parser_expansion_1,APropagateException); + WriteLn; + WriteLn('***************************** TEST SPECIAL CHAR 1 ******************'); + do_exec_test(@test_parser_special_char_1,APropagateException); + WriteLn; + WriteLn('***************************** TEST SPECIAL CHAR 2 ******************'); + do_exec_test(@test_parser_special_char_2,APropagateException); + WriteLn; + WriteLn('***************************** TEST SPECIAL CHAR 3 ******************'); + do_exec_test(@test_parser_special_char_3,APropagateException); + WriteLn; + WriteLn('***************************** TEST SPECIAL CHAR 4 ******************'); + do_exec_test(@test_parser_special_char_4,APropagateException); + WriteLn; + WriteLn('***************************** TEST SPECIAL CHAR 5 ******************'); + do_exec_test(@test_parser_special_char_5,APropagateException); + WriteLn; + WriteLn('***************************** TEST SPECIAL CHAR 6 ******************'); + do_exec_test(@test_parser_special_char_6,APropagateException); + WriteLn; + WriteLn('***************************** TEST SPECIAL CHAR 7 ******************'); + do_exec_test(@test_parser_special_char_7,APropagateException); + WriteLn; + WriteLn('***************************** TEST SKIP COMMENT 1 ******************'); + do_exec_test(@test_parser_skip_comment_1,APropagateException); + WriteLn; + WriteLn('***************************** TEST SKIP COMMENT 2 ******************'); + do_exec_test(@test_parser_skip_comment_2,APropagateException); + WriteLn; + WriteLn('***************************** TEST SKIP COMMENT 3 ******************'); + do_exec_test(@test_parser_skip_comment_3,APropagateException); + WriteLn; + WriteLn('***************************** TEST QUOTED STRING 1 ******************'); + do_exec_test(@test_parser_quoted_string_1,APropagateException); + WriteLn; + WriteLn('***************************** TEST QUOTED STRING 2 ******************'); + do_exec_test(@test_parser_quoted_string_2,APropagateException); + WriteLn; + WriteLn('***************************** TEST QUOTED STRING 3 ******************'); + do_exec_test(@test_parser_quoted_string_3,APropagateException); + WriteLn; + WriteLn('***************************** TEST QUOTED STRING 4 ******************'); + do_exec_test(@test_parser_quoted_string_4,APropagateException); + WriteLn; + WriteLn('***************************** TEST QUOTED STRING 5 ******************'); + do_exec_test(@test_parser_quoted_string_5,APropagateException); + WriteLn; + WriteLn('***************************** TEST QUOTED STRING 6 ******************'); + do_exec_test(@test_parser_quoted_string_6,APropagateException); + WriteLn; + WriteLn('***************************** TEST QUOTED STRING 7 ******************'); + do_exec_test(@test_parser_quoted_string_7,APropagateException); + WriteLn; + WriteLn('***************************** TEST QUOTED STRING 8 ******************'); + do_exec_test(@test_parser_quoted_string_8,APropagateException); + WriteLn; + WriteLn('***************************** TEST CONTEXT BEFORE 1 ******************'); + do_exec_test(@test_parser_contexte_before_1,APropagateException); + WriteLn; + WriteLn('***************************** TEST CONTEXT BEFORE 2 ******************'); + do_exec_test(@test_parser_contexte_before_2,APropagateException); + WriteLn; + WriteLn('***************************** TEST CONTEXT BEFORE 3 ******************'); + do_exec_test(@test_parser_contexte_before_3,APropagateException); + WriteLn; + WriteLn('***************************** TEST CONTEXT BEFORE 4 ******************'); + do_exec_test(@test_parser_contexte_before_4,APropagateException); + WriteLn; + WriteLn('***************************** TEST PLACEMENT BEFORE 1 ******************'); + do_exec_test(@test_parser_placement_before_1,APropagateException); + WriteLn; + WriteLn('***************************** TEST PLACEMENT BEFORE 2 ******************'); + do_exec_test(@test_parser_placement_before_2,APropagateException); + WriteLn; + WriteLn('***************************** TEST PLACEMENT BEFORE 3 ******************'); + do_exec_test(@test_parser_placement_before_3,APropagateException); + WriteLn; + WriteLn('***************************** TEST MULTI UNIT STATEMENT LINE 1 ******************'); + do_exec_test(@test_parser_multi_unit_statement_line_1,APropagateException); + WriteLn; + WriteLn('***************************** TEST MULTI UNIT STATEMENT LINE 2 ******************'); + do_exec_test(@test_parser_multi_unit_statement_line_2,APropagateException); + WriteLn; + WriteLn('***************************** TEST MULTI UNIT STATEMENT LINE 3 ******************'); + do_exec_test(@test_parser_multi_unit_statement_line_3,APropagateException); + WriteLn; + WriteLn('***************************** TEST MULTI STATEMENT LINE 1 ******************'); + do_exec_test(@test_parser_multi_statement_line_1,APropagateException); + WriteLn; + WriteLn('***************************** TEST MULTI STATEMENT LINE 2 ******************'); + do_exec_test(@test_parser_multi_statement_line_2,APropagateException); + WriteLn; + WriteLn('***************************** TEST MULTI STATEMENT LINE 3 ******************'); + do_exec_test(@test_parser_multi_statement_line_3,APropagateException); + WriteLn; + WriteLn('***************************** TEST MULTI STATEMENT LINE 4 ******************'); + do_exec_test(@test_parser_multi_statement_line_4,APropagateException); + WriteLn; + WriteLn('***************************** TEST MULTI LINE STATEMENTS 1 ******************'); + do_exec_test(@test_parser_multi_line_statements_1,APropagateException); + WriteLn; + WriteLn; + WriteLn('***************************** TEST REPOSITORY 1 ******************'); + do_exec_test(@test_collation_parser_HeaderParsing,APropagateException); + WriteLn; + WriteLn('***************************** TEST REPOSITORY 2 ******************'); + do_exec_test(@test_collation_parser_FullParsing,APropagateException); + WriteLn; + WriteLn('***************************** TEST REPOSITORY 3 ******************'); + do_exec_test(@test_collation_parser_complete_rules,APropagateException); + WriteLn; + WriteLn('***************************** TEST REPOSITORY 4 ******************'); + do_exec_test(@test_collation_parser_HeaderParsing_2,APropagateException); + WriteLn; + WriteLn('***************************** TEST REPOSITORY 5 ******************'); + do_exec_test(@test_collation_parser_FullParsing_2,APropagateException); + WriteLn; + WriteLn('***************************** TEST REPOSITORY 6 ******************'); + do_exec_test(@test_collation_parser_complete_rules_2,APropagateException); + WriteLn; + + WriteLn; + WriteLn; +end; + +procedure exec_tests(const APropagateException : Boolean); +begin + exec_utils_tests(APropagateException); + + exec_parser_tests(APropagateException); + + WriteLn('END PARSER TESTS'); + WriteLn('*******************************************************'); + WriteLn('***************************** TEST 1 ******************'); - test1(); + do_exec_test(@test1,APropagateException); WriteLn('***************************** TEST 2 ******************'); - test2(); + do_exec_test(@test2,APropagateException); WriteLn('***************************** TEST 3 ******************'); - test3(); + do_exec_test(@test3,APropagateException); WriteLn('***************************** TEST 4 ******************'); - test4(); + do_exec_test(@test4,APropagateException); WriteLn('***************************** TEST 5 ******************'); - test5(); + do_exec_test(@test5,APropagateException); WriteLn('***************************** TEST 6 ******************'); - test6(); + do_exec_test(@test6,APropagateException); WriteLn('***************************** TEST 7 ******************'); - test7(); + do_exec_test(@test7,APropagateException); WriteLn('***************************** TEST 8 ******************'); - test8(); + do_exec_test(@test8,APropagateException); WriteLn('***************************** TEST 9 ******************'); - test9(); + do_exec_test(@test9,APropagateException); WriteLn('***************************** TEST 10 ******************'); - test10(); + do_exec_test(@test10,APropagateException); WriteLn('***************************** TEST 11 ******************'); - test11(); + do_exec_test(@test11,APropagateException); WriteLn('***************************** TEST 12 ******************'); - test12(); + do_exec_test(@test12,APropagateException); WriteLn('***************************** TEST 13 ******************'); - test13(); + do_exec_test(@test13,APropagateException); WriteLn('***************************** TEST 14 ******************'); - test14(); + do_exec_test(@test14,APropagateException); WriteLn('***************************** TEST 15 ******************'); - test15(); + do_exec_test(@test15,APropagateException); + WriteLn('***************************** TEST 16 A ******************'); + do_exec_test(@test16a,APropagateException); + WriteLn('***************************** TEST 16 B ******************'); + do_exec_test(@test16b,APropagateException); + WriteLn('***************************** TEST 16 C ******************'); + do_exec_test(@test16c,APropagateException); + WriteLn('***************************** TEST 16 D ******************'); + do_exec_test(@test16d,APropagateException); + WriteLn('***************************** TEST 16 E ******************'); + do_exec_test(@test16e,APropagateException); + + WriteLn('**** END TESTS'); end; function ToAnsiChars(const AValue : array of TUnicodeCodePoint) : string; @@ -140,7 +462,10 @@ begin Inc(p); for i := i to ASequence.ActualLength - 1 do begin //WriteStr(s,s,AnsiChar(p^.Characters[0]),' <',(1+Ord(p^.WeigthKind)),' '); - WriteStr(s,s,'<',(1+Ord(p^.WeigthKind)),' ',ToAnsiChars(p^.Characters),' '); + if p^.IsVirtual() then + WriteStr(s,s,' [',GetEnumName(TypeInfo(TReorderLogicalReset),Ord(p^.VirtualPosition)),'] ') + else + WriteStr(s,s,'<',(1+Ord(p^.WeigthKind)),' ',ToAnsiChars(p^.Characters),' '); Inc(p); end; end; @@ -478,7 +803,7 @@ begin SetLength(statement.Reset,1); statement.Reset[0] := Ord('a'); - SetLength(statement.Elements,1); + statement.SetElementCount(1); statement.Elements[0] := TReorderUnit.From(Ord('g'),TReorderWeigthKind.Primary,0); sequence.ApplyStatement(@statement); WriteLn('Statement #1 = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak); @@ -491,7 +816,7 @@ begin SetLength(statement.Reset,1); statement.Reset[0] := Ord('a'); - SetLength(statement.Elements,2); + statement.SetElementCount(2); statement.Elements[0] := TReorderUnit.From(Ord('h'),TReorderWeigthKind.Primary,0); statement.Elements[1] := TReorderUnit.From(Ord('k'),TReorderWeigthKind.Primary,0); sequence.ApplyStatement(@statement); @@ -504,7 +829,7 @@ begin SetLength(statement.Reset,1); statement.Reset[0] := Ord('h'); - SetLength(statement.Elements,1); + statement.SetElementCount(1); statement.Elements[0] := TReorderUnit.From(Ord('g'),TReorderWeigthKind.Secondary,0); sequence.ApplyStatement(@statement); WriteLn('Statement #3 = ',sLineBreak,' ',DumpSequenceAnsi(sequence)); @@ -3264,4 +3589,1916 @@ begin end; +procedure test16_prepareWeigth(var AData : TUCA_LineRecArray); +var + p : PUCA_LineRec; +begin + SetLength(AData,3); + p := @AData[Low(AData)]; + p^.CodePoints := CodePointToArray(Ord('a')); + p^.Weights := ToWeight($15EF,$0020,$0002); + Inc(p); + p^.CodePoints := CodePointToArray(Ord('b')); + p^.Weights := ToWeight($1605,$0020,$0002); + Inc(p); + p^.CodePoints := CodePointToArray(Ord('c')); + p^.Weights := ToWeight($161D,$0020,$0002); +end; + +procedure test16a(); +var + sequence : TOrderedCharacters; + statement : TReorderSequence; + wfirst, wresult : TUCA_LineRecArray; + i : Integer; + unicodeBook1, unicodeBook2 : unicodedata.TUCA_DataBook; +begin + statement.Clear(); + test16_prepareWeigth(wfirst); + sequence := TOrderedCharacters.Create(); + sequence.Append(TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,1)); + sequence.Append(TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,2)); + sequence.Append(TReorderUnit.From(TReorderLogicalReset.LastRegular)); + sequence.Append(TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,3)); + for i := 0 to sequence.ActualLength - 1 do + sequence.Data[i].Changed := False; + WriteLn('Initial = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak); + WriteLn(DumpLines(wfirst),sLineBreak+sLineBreak); + ConstructUnicodeBook(wfirst,'test1','first',nil,unicodeBook1); + CheckInf(['a','b','c'],@unicodeBook1); + + // --- test 1 + SetLength(statement.Reset,1); + statement.LogicalPosition := TReorderLogicalReset.LastRegular; + SetLength(statement.Elements,1); + statement.Elements[0] := TReorderUnit.From(Ord('x'),TReorderWeigthKind.Primary,0); + sequence.ApplyStatement(@statement); + WriteLn('Statement #1 = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak); + wresult := nil; + ComputeWeigths(@sequence.Data[0],sequence.ActualLength,wfirst,wresult); + WriteLn(DumpLines(wresult),sLineBreak+sLineBreak); + ConstructUnicodeBook(wresult,'test1','1',@unicodeBook1,unicodeBook2); + CheckInf(['a','b','x'{*},'c'],@unicodeBook2); + WriteLn(' -- test 1 - ok'); +end; + +procedure test16b(); +var + sequence : TOrderedCharacters; + statement : TReorderSequence; + wfirst, wresult : TUCA_LineRecArray; + i : Integer; + unicodeBook1, unicodeBook2 : unicodedata.TUCA_DataBook; +begin + statement.Clear(); + test16_prepareWeigth(wfirst); + sequence := TOrderedCharacters.Create(); + sequence.Append(TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,1)); + sequence.Append(TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,2)); + sequence.Append(TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,3)); + for i := 0 to sequence.ActualLength - 1 do + sequence.Data[i].Changed := False; + WriteLn('Initial = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak); + WriteLn(DumpLines(wfirst),sLineBreak+sLineBreak); + ConstructUnicodeBook(wfirst,'test1','first',nil,unicodeBook1); + CheckInf(['a','b','c'],@unicodeBook1); + + // --- test 1 + SetLength(statement.Reset,1); + statement.LogicalPosition := TReorderLogicalReset.LastRegular; + SetLength(statement.Elements,1); + statement.Elements[0] := TReorderUnit.From(Ord('x'),TReorderWeigthKind.Primary,0); + sequence.ApplyStatement(@statement); + WriteLn('Statement #1 = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak); + wresult := nil; + ComputeWeigths(@sequence.Data[0],sequence.ActualLength,wfirst,wresult); + WriteLn(DumpLines(wresult),sLineBreak+sLineBreak); + ConstructUnicodeBook(wresult,'test1','1',@unicodeBook1,unicodeBook2); + CheckInf(['a','b','c','x'{*}],@unicodeBook2); + WriteLn(' -- test 1 - ok'); + writeln; + writeln; + + // test 2 + statement.Clear(); + SetLength(statement.Reset,1); + statement.Reset[0] := Ord('x'); + SetLength(statement.Elements,1); + statement.Elements[0] := TReorderUnit.From(Ord('y'),TReorderWeigthKind.Primary,0); + sequence.ApplyStatement(@statement); + WriteLn('Statement #2 = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak); + wresult := nil; + ComputeWeigths(@sequence.Data[0],sequence.ActualLength,wfirst,wresult); + WriteLn(DumpLines(wresult),sLineBreak+sLineBreak); + ConstructUnicodeBook(wresult,'test2','2',@unicodeBook1,unicodeBook2); + CheckInf(['a','b','c','x'{*},'y'{*}],@unicodeBook2); + WriteLn(' -- test 2 - ok'); +end; + +procedure test16c(); +var + sequence : TOrderedCharacters; + statement : TReorderSequence; + wfirst, wresult : TUCA_LineRecArray; + i : Integer; + unicodeBook1, unicodeBook2 : unicodedata.TUCA_DataBook; +begin + statement.Clear(); + test16_prepareWeigth(wfirst); + sequence := TOrderedCharacters.Create(); + sequence.Append(TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,1)); + sequence.Append(TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,2)); + sequence.Append(TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,3)); + for i := 0 to sequence.ActualLength - 1 do + sequence.Data[i].Changed := False; + WriteLn('Initial = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak); + WriteLn(DumpLines(wfirst),sLineBreak+sLineBreak); + ConstructUnicodeBook(wfirst,'test1','first',nil,unicodeBook1); + CheckInf(['a','b','c'],@unicodeBook1); + + // --- test 1 + SetLength(statement.Reset,1); + statement.LogicalPosition := TReorderLogicalReset.LastRegular; + SetLength(statement.Elements,1); + statement.Elements[0] := TReorderUnit.From(Ord('x'),TReorderWeigthKind.Secondary,0); + sequence.ApplyStatement(@statement); + WriteLn('Statement #1 = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak); + wresult := nil; + ComputeWeigths(@sequence.Data[0],sequence.ActualLength,wfirst,wresult); + WriteLn(DumpLines(wresult),sLineBreak+sLineBreak); + ConstructUnicodeBook(wresult,'test1','1',@unicodeBook1,unicodeBook2); + CheckInf(['a','b','c','x'{*}],@unicodeBook2); + WriteLn(' -- test 1 - ok'); + writeln; + writeln; + + // test 2 + statement.Clear(); + SetLength(statement.Reset,1); + statement.Reset[0] := Ord('x'); + SetLength(statement.Elements,1); + statement.Elements[0] := TReorderUnit.From(Ord('y'),TReorderWeigthKind.Primary,0); + sequence.ApplyStatement(@statement); + WriteLn('Statement #2 = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak); + wresult := nil; + ComputeWeigths(@sequence.Data[0],sequence.ActualLength,wfirst,wresult); + WriteLn(DumpLines(wresult),sLineBreak+sLineBreak); + ConstructUnicodeBook(wresult,'test2','2',@unicodeBook1,unicodeBook2); + CheckInf(['a','b','c','x'{*},'y'{*}],@unicodeBook2); + WriteLn(' -- test 2 - ok'); +end; + +procedure test16d(); +var + sequence : TOrderedCharacters; + statement : TReorderSequence; + wfirst, wresult : TUCA_LineRecArray; + i : Integer; + unicodeBook1, unicodeBook2 : unicodedata.TUCA_DataBook; +begin + statement.Clear(); + test16_prepareWeigth(wfirst); + sequence := TOrderedCharacters.Create(); + sequence.Append(TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,1)); + sequence.Append(TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,2)); + sequence.Append(TReorderUnit.From(TReorderLogicalReset.LastRegular)); + sequence.Append(TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,3)); + for i := 0 to sequence.ActualLength - 1 do + sequence.Data[i].Changed := False; + WriteLn('Initial = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak); + WriteLn(DumpLines(wfirst),sLineBreak+sLineBreak); + ConstructUnicodeBook(wfirst,'test1','first',nil,unicodeBook1); + CheckInf(['a','b','c'],@unicodeBook1); + + // --- test 1 + SetLength(statement.Reset,1); + statement.LogicalPosition := TReorderLogicalReset.LastRegular; + statement.Before := True; + SetLength(statement.Elements,1); + statement.Elements[0] := TReorderUnit.From(Ord('x'),TReorderWeigthKind.Primary,0); + sequence.ApplyStatement(@statement); + WriteLn('Statement #1 = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak); + wresult := nil; + ComputeWeigths(@sequence.Data[0],sequence.ActualLength,wfirst,wresult); + WriteLn(DumpLines(wresult),sLineBreak+sLineBreak); + ConstructUnicodeBook(wresult,'test1','1',@unicodeBook1,unicodeBook2); + CheckInf(['a','x'{*},'b','c'],@unicodeBook2); + WriteLn(' -- test 1 - ok'); +end; + +procedure test16e_prepareWeigth(var AData : TUCA_LineRecArray); +var + p : PUCA_LineRec; +begin + SetLength(AData,5); + p := @AData[Low(AData)]; + p^.CodePoints := CodePointToArray(Ord('a')); + p^.Weights := ToWeight(1,10,10); + Inc(p); + p^.CodePoints := CodePointToArray(Ord('b')); + p^.Weights := ToWeight(2,10,10); + Inc(p); + p^.CodePoints := CodePointToArray([Ord('b'),Ord('2')]); + p^.Weights := ToWeight(2,20,10); + Inc(p); + p^.CodePoints := CodePointToArray([Ord('b'),Ord('3')]); + p^.Weights := ToWeight(2,20,20); + Inc(p); + p^.CodePoints := CodePointToArray(Ord('c')); + p^.Weights := ToWeight(3,10,10); +end; + +procedure test16e(); +var + sequence : TOrderedCharacters; + statement : TReorderSequence; + wfirst, wresult : TUCA_LineRecArray; + i : Integer; + unicodeBook1, unicodeBook2 : unicodedata.TUCA_DataBook; +begin + statement.Clear(); + test16e_prepareWeigth(wfirst); + sequence := TOrderedCharacters.Create(); + sequence.Append(TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,1)); + sequence.Append(TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,2)); + sequence.Append(TReorderUnit.From([Ord('b'),Ord('2')],TReorderWeigthKind.Secondary,3)); + sequence.Append(TReorderUnit.From([Ord('b'),Ord('3')],TReorderWeigthKind.Tertiary,4)); + sequence.Append(TReorderUnit.From(TReorderLogicalReset.LastRegular)); + sequence.Append(TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,5)); + for i := 0 to sequence.ActualLength - 1 do + sequence.Data[i].Changed := False; + WriteLn('Initial = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak); + WriteLn(DumpLines(wfirst),sLineBreak+sLineBreak); + ConstructUnicodeBook(wfirst,'test1','first',nil,unicodeBook1); + CheckInf(['a','b','b2','b3','c'],@unicodeBook1); + + // --- test 1 + SetLength(statement.Reset,1); + statement.LogicalPosition := TReorderLogicalReset.LastRegular; + statement.Before := True; + SetLength(statement.Elements,1); + statement.Elements[0] := TReorderUnit.From(Ord('x'),TReorderWeigthKind.Secondary,0); + sequence.ApplyStatement(@statement); + WriteLn('Statement #1 = ',sLineBreak,' ',DumpSequenceAnsi(sequence),sLineBreak); + wresult := nil; + ComputeWeigths(@sequence.Data[0],sequence.ActualLength,wfirst,wresult); + WriteLn(DumpLines(wresult),sLineBreak+sLineBreak); + ConstructUnicodeBook(wresult,'test1','1',@unicodeBook1,unicodeBook2); + CheckInf(['a','b','x'{*},'b2','b3','c'],@unicodeBook2); + WriteLn(' -- test 1 - ok'); +end; + +procedure CheckEqual(A,B : array of TUnicodeCodePoint; const AMsg : string);overload; +var + i : Integer; +begin + Check((Length(A)=Length(B)),'Length() <>'); + if (Length(A) > 0) then begin + for i := Low(A) to High(A) do + Check(A[i] = B[i],'%s, A[%d] <>',[AMsg,i]); + end; +end; + +procedure CheckEqual(A,B : TReorderUnit; const AMsg : string);overload; +var + i : Integer; +begin + Check((A.VirtualPosition=B.VirtualPosition),'VirtualPosition <>'); + Check((A.InitialPosition=B.InitialPosition),'InitialPosition <>'); + Check((A.Changed=B.Changed),'Changed <>'); + Check((A.WeigthKind=B.WeigthKind),'WeigthKind <>'); + CheckEqual(A.Context,B.Context,'Context'); + CheckEqual(A.ExpansionChars,B.ExpansionChars,'ExpansionChars'); + CheckEqual(A.Characters,B.Characters,'Characters'); + CheckEqual(A.Context,B.Context,'Context'); +end; + +procedure CheckEqual(A,B : TReorderSequence);overload; +var + i : Integer; +begin + Check((A.LogicalPosition=B.LogicalPosition),'LogicalPosition <>'); + Check((A.Before=B.Before),'Before <>'); + CheckEqual(A.Reset,B.Reset,'Reset'); + Check((Length(A.Elements)=Length(B.Elements)),'Length(Elements) <>'); + for i := Low(A.Elements) to High(A.Elements) do + CheckEqual(A.Elements[i],B.Elements[i],Format('Elements[%d]',[i])); +end; + +function CountLines(const AStr : ansistring) : Integer; +var + c, i : Integer; +begin + c := 0; + for i := 1 to Length(AStr) do begin + if (AStr[i] = #10) then + c := c+1; + end; + if (c = 0) and (AStr <> '') then + c := c+1; + Result := c; +end; + +procedure do_test_parser( + AText : ansistring; + const AExpected : TReorderSequence; + const ALineCount : Integer +);overload; +var + locText : UTF8String; + locTextPointer : PAnsiChar; + locStartPosition, + locMaxLen : Integer; + locStatement : TReorderSequence; + locNextPos, + locLineCount : Integer; +begin + locText := AText; + WriteLn('Parsing "',locText,'" ...'); + locTextPointer := @locText[1]; + locMaxLen := Length(locText); + locStartPosition := 0; + locNextPos := 0; + locLineCount := 0; + locStatement.Clear(); + Check( + ParseStatement( + locTextPointer,locStartPosition,locMaxLen,@locStatement,locNextPos,locLineCount + ), + 'Fail to Parse : "%s".', [locText] + ); + if (locLineCount > 1) then + WriteLn; + WriteLn(' Next Position : ',locNextPos); + WriteLn(' Line Count : ',locLineCount); + if (CountLines(locText) = 1) then + Check((locNextPos>=locMaxLen),'Next Position'); + if (ALineCount > 0) then + Check((locLineCount=ALineCount),'Line Count'); + CheckEqual(locStatement,AExpected); + + WriteLn(' -- test ok'); +end; + +procedure do_test_parser(AText : ansistring; const AExpected : TReorderSequence);inline;overload; +begin + do_test_parser(AText,AExpected,1); +end; + +procedure test_parser_1(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + locStatement.LogicalPosition := TReorderLogicalReset.LastTertiaryIgnorable; + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Identity,0); + do_test_parser('& [last tertiary ignorable] = a',locStatement); +end; + +procedure test_parser_2(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + locStatement.LogicalPosition := TReorderLogicalReset.LastTertiaryIgnorable; + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,0); + do_test_parser('& [last tertiary ignorable] < b',locStatement); +end; + +procedure test_parser_3(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + locStatement.LogicalPosition := TReorderLogicalReset.LastTertiaryIgnorable; + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Secondary,0); + do_test_parser('& [last tertiary ignorable] << c',locStatement); +end; + +procedure test_parser_4(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + locStatement.LogicalPosition := TReorderLogicalReset.LastTertiaryIgnorable; + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From(Ord('d'),TReorderWeigthKind.Tertiary,0); + do_test_parser('& [last tertiary ignorable] <<< d',locStatement); +end; + +procedure test_parser_5(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + locStatement.LogicalPosition := TReorderLogicalReset.LastTertiaryIgnorable; + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From(1,TReorderWeigthKind.Primary,0); + do_test_parser('& [last tertiary ignorable] < ''\u0001''',locStatement); +end; + +procedure test_parser_6(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + locStatement.LogicalPosition := TReorderLogicalReset.LastTertiaryIgnorable; + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From(7,TReorderWeigthKind.Secondary,0); + do_test_parser('& [last tertiary ignorable] << ''\u0007''',locStatement); +end; + +procedure test_parser_7(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + locStatement.LogicalPosition := TReorderLogicalReset.LastTertiaryIgnorable; + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From(9,TReorderWeigthKind.Secondary,0); + do_test_parser('& [last tertiary ignorable] << ''\u0009''',locStatement); +end; + +procedure test_parser_8(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + locStatement.LogicalPosition := TReorderLogicalReset.LastTertiaryIgnorable; + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From($000110BD,TReorderWeigthKind.Primary,0); + do_test_parser('& [last tertiary ignorable] < ''\U000110BD''',locStatement); +end; + +procedure test_parser_9(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0); + do_test_parser('&x < a',locStatement); +end; + +procedure test_parser_abreviating_1(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(3); + locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0); + locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,0); + locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,0); + do_test_parser('&x <* abc',locStatement); +end; + +procedure test_parser_abreviating_2(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(7); + locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0); + locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,0); + locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,0); + locStatement.Elements[3] := TReorderUnit.From(Ord('d'),TReorderWeigthKind.Primary,0); + locStatement.Elements[4] := TReorderUnit.From(Ord('e'),TReorderWeigthKind.Primary,0); + locStatement.Elements[5] := TReorderUnit.From(Ord('f'),TReorderWeigthKind.Primary,0); + locStatement.Elements[6] := TReorderUnit.From(Ord('g'),TReorderWeigthKind.Primary,0); + do_test_parser('&x <* abcd-g',locStatement); +end; + +procedure test_parser_abreviating_3(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(8); + locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0); + locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,0); + locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,0); + locStatement.Elements[3] := TReorderUnit.From(Ord('d'),TReorderWeigthKind.Primary,0); + locStatement.Elements[4] := TReorderUnit.From(Ord('e'),TReorderWeigthKind.Primary,0); + locStatement.Elements[5] := TReorderUnit.From(Ord('f'),TReorderWeigthKind.Primary,0); + locStatement.Elements[6] := TReorderUnit.From(Ord('g'),TReorderWeigthKind.Primary,0); + locStatement.Elements[7] := TReorderUnit.From(Ord('p'),TReorderWeigthKind.Primary,0); + do_test_parser('&x <* abcd-gp',locStatement); +end; + +procedure test_parser_abreviating_4(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(11); + locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0); + locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,0); + locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,0); + locStatement.Elements[3] := TReorderUnit.From(Ord('d'),TReorderWeigthKind.Primary,0); + locStatement.Elements[4] := TReorderUnit.From(Ord('e'),TReorderWeigthKind.Primary,0); + locStatement.Elements[5] := TReorderUnit.From(Ord('f'),TReorderWeigthKind.Primary,0); + locStatement.Elements[6] := TReorderUnit.From(Ord('g'),TReorderWeigthKind.Primary,0); + locStatement.Elements[7] := TReorderUnit.From(Ord('p'),TReorderWeigthKind.Primary,0); + locStatement.Elements[8] := TReorderUnit.From(Ord('q'),TReorderWeigthKind.Primary,0); + locStatement.Elements[9] := TReorderUnit.From(Ord('r'),TReorderWeigthKind.Primary,0); + locStatement.Elements[10] := TReorderUnit.From(Ord('s'),TReorderWeigthKind.Primary,0); + do_test_parser('&x <* abcd-gp-s',locStatement); +end; + +procedure test_parser_abreviating_5(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(3); + locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Secondary,0); + locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Secondary,0); + locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Secondary,0); + do_test_parser('&x <<* abc',locStatement); +end; + +procedure test_parser_abreviating_6(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(11); + locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Secondary,0); + locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Secondary,0); + locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Secondary,0); + locStatement.Elements[3] := TReorderUnit.From(Ord('d'),TReorderWeigthKind.Secondary,0); + locStatement.Elements[4] := TReorderUnit.From(Ord('e'),TReorderWeigthKind.Secondary,0); + locStatement.Elements[5] := TReorderUnit.From(Ord('f'),TReorderWeigthKind.Secondary,0); + locStatement.Elements[6] := TReorderUnit.From(Ord('g'),TReorderWeigthKind.Secondary,0); + locStatement.Elements[7] := TReorderUnit.From(Ord('p'),TReorderWeigthKind.Secondary,0); + locStatement.Elements[8] := TReorderUnit.From(Ord('q'),TReorderWeigthKind.Secondary,0); + locStatement.Elements[9] := TReorderUnit.From(Ord('r'),TReorderWeigthKind.Secondary,0); + locStatement.Elements[10] := TReorderUnit.From(Ord('s'),TReorderWeigthKind.Secondary,0); + do_test_parser('&x <<* abcd-gp-s',locStatement); +end; + +procedure test_parser_abreviating_7(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(3); + locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Tertiary,0); + locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Tertiary,0); + locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Tertiary,0); + do_test_parser('&x <<<* abc',locStatement); +end; + +procedure test_parser_abreviating_8(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(11); + locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Tertiary,0); + locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Tertiary,0); + locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Tertiary,0); + locStatement.Elements[3] := TReorderUnit.From(Ord('d'),TReorderWeigthKind.Tertiary,0); + locStatement.Elements[4] := TReorderUnit.From(Ord('e'),TReorderWeigthKind.Tertiary,0); + locStatement.Elements[5] := TReorderUnit.From(Ord('f'),TReorderWeigthKind.Tertiary,0); + locStatement.Elements[6] := TReorderUnit.From(Ord('g'),TReorderWeigthKind.Tertiary,0); + locStatement.Elements[7] := TReorderUnit.From(Ord('p'),TReorderWeigthKind.Tertiary,0); + locStatement.Elements[8] := TReorderUnit.From(Ord('q'),TReorderWeigthKind.Tertiary,0); + locStatement.Elements[9] := TReorderUnit.From(Ord('r'),TReorderWeigthKind.Tertiary,0); + locStatement.Elements[10] := TReorderUnit.From(Ord('s'),TReorderWeigthKind.Tertiary,0); + do_test_parser('&x <<<* abcd-gp-s',locStatement); +end; + +procedure test_parser_abreviating_9(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(3); + locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Identity,0); + locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Identity,0); + locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Identity,0); + do_test_parser('&x =* abc',locStatement); +end; + +procedure test_parser_abreviating_10(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(11); + locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Identity,0); + locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Identity,0); + locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Identity,0); + locStatement.Elements[3] := TReorderUnit.From(Ord('d'),TReorderWeigthKind.Identity,0); + locStatement.Elements[4] := TReorderUnit.From(Ord('e'),TReorderWeigthKind.Identity,0); + locStatement.Elements[5] := TReorderUnit.From(Ord('f'),TReorderWeigthKind.Identity,0); + locStatement.Elements[6] := TReorderUnit.From(Ord('g'),TReorderWeigthKind.Identity,0); + locStatement.Elements[7] := TReorderUnit.From(Ord('p'),TReorderWeigthKind.Identity,0); + locStatement.Elements[8] := TReorderUnit.From(Ord('q'),TReorderWeigthKind.Identity,0); + locStatement.Elements[9] := TReorderUnit.From(Ord('r'),TReorderWeigthKind.Identity,0); + locStatement.Elements[10] := TReorderUnit.From(Ord('s'),TReorderWeigthKind.Identity,0); + do_test_parser('&x =* abcd-gp-s',locStatement); +end; + +procedure test_parser_contraction_1(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('k'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From([Ord('c'),Ord('h')],TReorderWeigthKind.Primary,0); + do_test_parser('&k < ch',locStatement); +end; + +procedure test_parser_contraction_2(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,3); + locStatement.Reset[0] := Ord('a'); + locStatement.Reset[1] := Ord('b'); + locStatement.Reset[2] := Ord('c'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From([Ord('c'),Ord('h')],TReorderWeigthKind.Primary,0); + do_test_parser('&abc < ch',locStatement); +end; + +procedure test_parser_expansion_1(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('a'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From(Ord('z'),TReorderWeigthKind.Primary,0); + locStatement.Elements[0].ExpansionChars := CodePointToArray(Ord('e')); + do_test_parser('&a < z/e',locStatement); +end; + +procedure test_parser_special_char_1(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From(Ord('/'),TReorderWeigthKind.Primary,0); + do_test_parser('&x < ''/''',locStatement); +end; + +procedure test_parser_special_char_2(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From(Ord('&'),TReorderWeigthKind.Primary,0); + do_test_parser('&x < ''&''',locStatement); +end; + +procedure test_parser_special_char_3(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From(Ord('<'),TReorderWeigthKind.Primary,0); + do_test_parser('&x < ''<''',locStatement); +end; + +procedure test_parser_special_char_4(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From(Ord('|'),TReorderWeigthKind.Primary,0); + do_test_parser('&x < ''|''',locStatement); +end; + +procedure test_parser_special_char_5(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From(Ord('*'),TReorderWeigthKind.Primary,0); + do_test_parser('&x < ''*''',locStatement); +end; + +procedure test_parser_special_char_6(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From(Ord('['),TReorderWeigthKind.Primary,0); + do_test_parser('&x < ''[''',locStatement); +end; + +procedure test_parser_special_char_7(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From(Ord(']'),TReorderWeigthKind.Primary,0); + do_test_parser('&x < '']''',locStatement); +end; + +procedure test_parser_skip_comment_1(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0); + do_test_parser( + '&x #' + sLineBreak + + ' < a', + locStatement, 2 + ); +end; + +procedure test_parser_skip_comment_2(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0); + do_test_parser( + '&x # hello' + sLineBreak + + ' < a', + locStatement, 2 + ); +end; + +procedure test_parser_skip_comment_3(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0); + do_test_parser( + '&x # hello' + sLineBreak + + sLineBreak + + #9#9' ' + sLineBreak + + ' < a', + locStatement, 4 + ); +end; + +procedure test_parser_quoted_string_1(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := + TReorderUnit.From( + [Ord('<'),Ord('#'),Ord('|'),Ord('/'),Ord('!')], + TReorderWeigthKind.Primary,0 + ); + do_test_parser('&x < ''<#|/!''',locStatement); +end; + +procedure test_parser_quoted_string_2(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := + TReorderUnit.From( + [Ord('<'),Ord('#'),Ord('|'),Ord('/'),Ord('!'),Ord('A')], + TReorderWeigthKind.Primary,0 + ); + do_test_parser('&x < ''<#|/!''A',locStatement); +end; + +procedure test_parser_quoted_string_3(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := + TReorderUnit.From( + [Ord('<'),Ord('#'),Ord('|'),Ord('/'),Ord('!')], + TReorderWeigthKind.Primary,0 + ); + do_test_parser('&x < ''<#|/!''#',locStatement); +end; + +procedure test_parser_quoted_string_4(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := + TReorderUnit.From( + [Ord('<'),Ord('#'),Ord('|'),Ord('/'),Ord('!'),Ord('A')], + TReorderWeigthKind.Primary,0 + ); + do_test_parser('&x < ''<#|/!''A#',locStatement); +end; + +procedure test_parser_quoted_string_5(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,3); + locStatement.Reset[0] := Ord('x'); + locStatement.Reset[1] := Ord('-'); + locStatement.Reset[2] := Ord('y'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := + TReorderUnit.From(Ord('k'),TReorderWeigthKind.Tertiary,0); + do_test_parser('&''x''-''y''<<<k',locStatement); +end; + +procedure test_parser_quoted_string_6(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := + TReorderUnit.From(Ord('|'),TReorderWeigthKind.Primary,0); + do_test_parser('&x < ''|''',locStatement); +end; + +procedure test_parser_quoted_string_7(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := + TReorderUnit.From([Ord('a'),Ord('|')],TReorderWeigthKind.Primary,0); + do_test_parser('&x < a''|''',locStatement); +end; + +procedure test_parser_quoted_string_8(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := + TReorderUnit.From([Ord('a'),Ord('|'),Ord('c')],TReorderWeigthKind.Primary,0); + do_test_parser('&x < a''|''c',locStatement); +end; + +procedure test_parser_contexte_before_1(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := + TReorderUnit.From(Ord('-'),[Ord('a')],TReorderWeigthKind.Secondary,0); + do_test_parser('&x << a|-',locStatement); +end; + +procedure test_parser_contexte_before_2(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('a'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := + TReorderUnit.From(Ord('-'),[Ord('a')],TReorderWeigthKind.Tertiary,0); + do_test_parser('&a <<< a|-',locStatement); +end; + +procedure test_parser_contexte_before_3(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := + TReorderUnit.From( + Ord('-'),[Ord('a'),Ord('z'),Ord('k')],TReorderWeigthKind.Secondary,0 + ); + do_test_parser('&x << azk|-',locStatement); +end; + +procedure test_parser_contexte_before_4(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(1); + locStatement.Elements[0] := + TReorderUnit.From( + [Ord('a'),Ord(':')],[Ord('a'),Ord('z'),Ord('k')], + TReorderWeigthKind.Secondary,0 + ); + do_test_parser('&x << azk|a:',locStatement); +end; + +procedure test_parser_placement_before_1(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.Before := True; + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From(Ord('k'),TReorderWeigthKind.Secondary,0); + do_test_parser('&[before 2] x << k',locStatement); +end; + +procedure test_parser_placement_before_2(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.Before := True; + locStatement.SetElementCount(1); + locStatement.Elements[0] := + TReorderUnit.From([Ord('z'),Ord('k')],TReorderWeigthKind.Tertiary,0); + do_test_parser('&[before 3] x <<< zk',locStatement); +end; + +procedure test_parser_placement_before_3(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.Before := True; + locStatement.SetElementCount(1); + locStatement.Elements[0] := TReorderUnit.From(Ord('z'),TReorderWeigthKind.Primary,0); + do_test_parser('&[before 1] x < z',locStatement); +end; + +procedure test_parser_multi_unit_statement_line_1(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(3); + locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0); + locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,0); + locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,0); + do_test_parser('&x < a < b < c',locStatement); + do_test_parser('&x <a <b <c',locStatement); + do_test_parser('&x <a<b<c',locStatement); +end; + +procedure test_parser_multi_unit_statement_line_2(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(3); + locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0); + locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Secondary,0); + locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Tertiary,0); + do_test_parser('&x < a << b <<< c',locStatement); + do_test_parser('&x <a <<b <<<c',locStatement); + do_test_parser('&x <a<<b<<<c',locStatement); +end; + +procedure test_parser_multi_unit_statement_line_3(); +var + locStatement : TReorderSequence; +begin + locStatement.Clear(); + SetLength(locStatement.Reset,1); + locStatement.Reset[0] := Ord('x'); + locStatement.SetElementCount(3); + locStatement.Elements[0] := TReorderUnit.From(Ord('a'),TReorderWeigthKind.Tertiary,0); + locStatement.Elements[1] := TReorderUnit.From(Ord('b'),TReorderWeigthKind.Secondary,0); + locStatement.Elements[2] := TReorderUnit.From(Ord('c'),TReorderWeigthKind.Tertiary,0); + do_test_parser('&x <<< a << b <<< c',locStatement); + do_test_parser('&x <<<a <<b <<<c',locStatement); + do_test_parser('&x <<<a<<b<<<c',locStatement); +end; + +procedure test_parser_multi_statement_line_1(); +const STATEMENT_BUFFER : UTF8String = '&r <<a &s <<< b'; +var + locStatements : array of TReorderSequence; + locStatement : PReorderSequence; + locExpectedStatement : TReorderSequence; + lineCount, i, bufferLength, k, nextPost : Integer; + buffer : PAnsiChar; +begin + buffer := @STATEMENT_BUFFER[1]; + WriteLn('Parsing "',buffer,'" ...'); + bufferLength := Length(buffer); + SetLength(locStatements,10); + lineCount := 0; + nextPost := 0; + i := 0; + k := 0; + while (i < bufferLength) do begin + locStatement := @locStatements[k]; + locStatement^.Clear(); + if not ParseStatement(buffer,i,bufferLength,locStatement,nextPost,lineCount) then + Break; + i := nextPost; + k := k+1; + if (k > 2) then + raise Exception.Create('2 Statements expected, more was parsed.'); + end; + Check((k=2), 'Statement Count'); + + locExpectedStatement.Clear(); + SetLength(locExpectedStatement.Reset,1); + locExpectedStatement.Reset[0] := Ord('r'); + locExpectedStatement.SetElementCount(1); + locExpectedStatement.Elements[0] := + TReorderUnit.From(Ord('a'),TReorderWeigthKind.Secondary,0); + CheckEqual(locStatements[0],locExpectedStatement); + + locExpectedStatement.Clear(); + SetLength(locExpectedStatement.Reset,1); + locExpectedStatement.Reset[0] := Ord('s'); + locExpectedStatement.SetElementCount(1); + locExpectedStatement.Elements[0] := + TReorderUnit.From(Ord('b'),TReorderWeigthKind.Tertiary,0); + CheckEqual(locStatements[1],locExpectedStatement); + + WriteLn(' -- test ok'); +end; + +procedure test_parser_multi_statement_line_2(); +const STATEMENT_BUFFER : UTF8String = '&r <a <b <<B &s <<< b <c'; +var + locStatements : array of TReorderSequence; + locStatement : PReorderSequence; + locExpectedStatement : TReorderSequence; + lineCount, i, bufferLength, k, nextPost : Integer; + buffer : PAnsiChar; +begin + buffer := @STATEMENT_BUFFER[1]; + WriteLn('Parsing "',buffer,'" ...'); + bufferLength := Length(buffer); + SetLength(locStatements,10); + lineCount := 0; + nextPost := 0; + i := 0; + k := 0; + while (i < bufferLength) do begin + locStatement := @locStatements[k]; + locStatement^.Clear(); + if not ParseStatement(buffer,i,bufferLength,locStatement,nextPost,lineCount) then + Break; + i := nextPost; + k := k+1; + if (k > 2) then + raise Exception.Create('2 Statements expected, more was parsed.'); + end; + Check((k=2), 'Statement Count'); + + locExpectedStatement.Clear(); + SetLength(locExpectedStatement.Reset,1); + locExpectedStatement.Reset[0] := Ord('r'); + locExpectedStatement.SetElementCount(3); + locExpectedStatement.Elements[0] := + TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0); + locExpectedStatement.Elements[1] := + TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,0); + locExpectedStatement.Elements[2] := + TReorderUnit.From(Ord('B'),TReorderWeigthKind.Secondary,0); + CheckEqual(locStatements[0],locExpectedStatement); + + locExpectedStatement.Clear(); + SetLength(locExpectedStatement.Reset,1); + locExpectedStatement.Reset[0] := Ord('s'); + locExpectedStatement.SetElementCount(2); + locExpectedStatement.Elements[0] := + TReorderUnit.From(Ord('b'),TReorderWeigthKind.Tertiary,0); + locExpectedStatement.Elements[1] := + TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,0); + CheckEqual(locStatements[1],locExpectedStatement); + + WriteLn(' -- test ok'); +end; + +procedure test_parser_multi_statement_line_3(); +const STATEMENT_BUFFER : UTF8String = '&r <a <b <<B &s <<< b <c &x <A <W'; +var + locStatements : array of TReorderSequence; + locStatement : PReorderSequence; + locExpectedStatement : TReorderSequence; + lineCount, i, bufferLength, k, nextPost : Integer; + buffer : PAnsiChar; +begin + buffer := @STATEMENT_BUFFER[1]; + WriteLn('Parsing "',buffer,'" ...'); + bufferLength := Length(buffer); + SetLength(locStatements,10); + lineCount := 0; + nextPost := 0; + i := 0; + k := 0; + while (i < bufferLength) do begin + locStatement := @locStatements[k]; + locStatement^.Clear(); + if not ParseStatement(buffer,i,bufferLength,locStatement,nextPost,lineCount) then + Break; + i := nextPost; + k := k+1; + if (k > 3) then + raise Exception.Create('3 Statements expected, more was parsed.'); + end; + Check((k=3), 'Statement Count'); + + locExpectedStatement.Clear(); + SetLength(locExpectedStatement.Reset,1); + locExpectedStatement.Reset[0] := Ord('r'); + locExpectedStatement.SetElementCount(3); + locExpectedStatement.Elements[0] := + TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0); + locExpectedStatement.Elements[1] := + TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,0); + locExpectedStatement.Elements[2] := + TReorderUnit.From(Ord('B'),TReorderWeigthKind.Secondary,0); + CheckEqual(locStatements[0],locExpectedStatement); + + locExpectedStatement.Clear(); + SetLength(locExpectedStatement.Reset,1); + locExpectedStatement.Reset[0] := Ord('s'); + locExpectedStatement.SetElementCount(2); + locExpectedStatement.Elements[0] := + TReorderUnit.From(Ord('b'),TReorderWeigthKind.Tertiary,0); + locExpectedStatement.Elements[1] := + TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,0); + CheckEqual(locStatements[1],locExpectedStatement); + + locExpectedStatement.Clear(); + SetLength(locExpectedStatement.Reset,1); + locExpectedStatement.Reset[0] := Ord('x'); + locExpectedStatement.SetElementCount(2); + locExpectedStatement.Elements[0] := + TReorderUnit.From(Ord('A'),TReorderWeigthKind.Primary,0); + locExpectedStatement.Elements[1] := + TReorderUnit.From(Ord('W'),TReorderWeigthKind.Primary,0); + CheckEqual(locStatements[2],locExpectedStatement); + + WriteLn(' -- test ok'); +end; + +procedure test_parser_multi_statement_line_4(); +const STATEMENT_BUFFER : UTF8String = + ' &r <a <b <<B &s <<< b <c &x <A <W'; +var + locStatements : array of TReorderSequence; + locStatement : PReorderSequence; + locExpectedStatement : TReorderSequence; + lineCount, i, bufferLength, k, nextPost : Integer; + buffer : PAnsiChar; +begin + buffer := @STATEMENT_BUFFER[1]; + WriteLn('Parsing "',buffer,'" ...'); + bufferLength := Length(buffer); + SetLength(locStatements,10); + lineCount := 0; + nextPost := 0; + i := 0; + k := 0; + while (i < bufferLength) do begin + locStatement := @locStatements[k]; + locStatement^.Clear(); + if not ParseStatement(buffer,i,bufferLength,locStatement,nextPost,lineCount) then + Break; + i := nextPost; + k := k+1; + if (k > 3) then + raise Exception.Create('3 Statements expected, more was parsed.'); + end; + Check((k=3), 'Statement Count'); + + locExpectedStatement.Clear(); + SetLength(locExpectedStatement.Reset,1); + locExpectedStatement.Reset[0] := Ord('r'); + locExpectedStatement.SetElementCount(3); + locExpectedStatement.Elements[0] := + TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0); + locExpectedStatement.Elements[1] := + TReorderUnit.From(Ord('b'),TReorderWeigthKind.Primary,0); + locExpectedStatement.Elements[2] := + TReorderUnit.From(Ord('B'),TReorderWeigthKind.Secondary,0); + CheckEqual(locStatements[0],locExpectedStatement); + + locExpectedStatement.Clear(); + SetLength(locExpectedStatement.Reset,1); + locExpectedStatement.Reset[0] := Ord('s'); + locExpectedStatement.SetElementCount(2); + locExpectedStatement.Elements[0] := + TReorderUnit.From(Ord('b'),TReorderWeigthKind.Tertiary,0); + locExpectedStatement.Elements[1] := + TReorderUnit.From(Ord('c'),TReorderWeigthKind.Primary,0); + CheckEqual(locStatements[1],locExpectedStatement); + + locExpectedStatement.Clear(); + SetLength(locExpectedStatement.Reset,1); + locExpectedStatement.Reset[0] := Ord('x'); + locExpectedStatement.SetElementCount(2); + locExpectedStatement.Elements[0] := + TReorderUnit.From(Ord('A'),TReorderWeigthKind.Primary,0); + locExpectedStatement.Elements[1] := + TReorderUnit.From(Ord('W'),TReorderWeigthKind.Primary,0); + CheckEqual(locStatements[2],locExpectedStatement); + + WriteLn(' -- test ok'); +end; + +procedure test_parser_multi_line_statements_1(); +const STATEMENT_BUFFER : UTF8String = + '&r <a #123'#10 + + '&s <<< b '; +var + locStatements : array of TReorderSequence; + locStatement : PReorderSequence; + locExpectedStatement : TReorderSequence; + lineCount, i, bufferLength, k, nextPost : Integer; + buffer : PAnsiChar; +begin + buffer := @STATEMENT_BUFFER[1]; + WriteLn('Parsing "',buffer,'" ...'); + bufferLength := Length(buffer); + SetLength(locStatements,10); + lineCount := 0; + nextPost := 0; + i := 0; + k := 0; + while (i < bufferLength) do begin + locStatement := @locStatements[k]; + locStatement^.Clear(); + if not ParseStatement(buffer,i,bufferLength,locStatement,nextPost,lineCount) then + Break; + i := nextPost; + k := k+1; + if (k > 2) then + raise Exception.Create('2 Statements expected, more was parsed.'); + end; + Check((k=2), 'Statement Count'); + + locExpectedStatement.Clear(); + SetLength(locExpectedStatement.Reset,1); + locExpectedStatement.Reset[0] := Ord('r'); + locExpectedStatement.SetElementCount(1); + locExpectedStatement.Elements[0] := + TReorderUnit.From(Ord('a'),TReorderWeigthKind.Primary,0); + CheckEqual(locStatements[0],locExpectedStatement); + + locExpectedStatement.Clear(); + SetLength(locExpectedStatement.Reset,1); + locExpectedStatement.Reset[0] := Ord('s'); + locExpectedStatement.SetElementCount(1); + locExpectedStatement.Elements[0] := + TReorderUnit.From(Ord('b'),TReorderWeigthKind.Tertiary,0); + CheckEqual(locStatements[1],locExpectedStatement); + + WriteLn(' -- test ok'); +end; + +//----------------------------------------------------------------------------// +const + UNICODE_LINE_BREAK = #10; + COLLATION_XML_TEXT = + '<ldml>' + UNICODE_LINE_BREAK + + ' <identity>' + UNICODE_LINE_BREAK + + ' <version number="1.2.3"/>' + UNICODE_LINE_BREAK + + ' <generation date="$Date: 2014-07-08 21:39:31 -0500 (Tue, 08 Jul 2014) $"/>' + UNICODE_LINE_BREAK + + ' <language type="xy" />' + UNICODE_LINE_BREAK + + ' </identity>' + UNICODE_LINE_BREAK + + ' <collations >' + UNICODE_LINE_BREAK + + ' <defaultCollation>one</defaultCollation>' + UNICODE_LINE_BREAK + + ' <collation type="abc" >' + UNICODE_LINE_BREAK + + ' <import source="xy" type="private-two"/>' + UNICODE_LINE_BREAK + + ' <import source="xy" type="one"/>' + UNICODE_LINE_BREAK + + ' <suppress_contractions>[qh]</suppress_contractions>' + UNICODE_LINE_BREAK + + ' <cr><![CDATA[' + UNICODE_LINE_BREAK + + ' &w<u<v' + UNICODE_LINE_BREAK + + ' ]]></cr>' + UNICODE_LINE_BREAK + + ' </collation>' + UNICODE_LINE_BREAK + + ' <collation type="one" >' + UNICODE_LINE_BREAK + + ' <cr><![CDATA[' + UNICODE_LINE_BREAK + + ' &h<z<b' + UNICODE_LINE_BREAK + + ' ]]></cr>' + UNICODE_LINE_BREAK + + ' </collation>' + UNICODE_LINE_BREAK + + ' <collation type="private-two" >' + UNICODE_LINE_BREAK + + ' <cr><![CDATA[' + UNICODE_LINE_BREAK + + ' &f<c<<<ce' + UNICODE_LINE_BREAK + + ' &q<qh<<<p' + UNICODE_LINE_BREAK + + ' ]]></cr>' + UNICODE_LINE_BREAK + + ' </collation >' + UNICODE_LINE_BREAK + + ' <collation type="three" >' + UNICODE_LINE_BREAK + + ' <cr><![CDATA[' + UNICODE_LINE_BREAK + + ' &d<c<b<a' + UNICODE_LINE_BREAK + + ' ]]></cr>' + UNICODE_LINE_BREAK + + ' </collation>' + UNICODE_LINE_BREAK + + ' </collations>' + UNICODE_LINE_BREAK + + '</ldml>'; + COLLATION_XML_TEXT2 = + '<ldml>' + UNICODE_LINE_BREAK + + ' <identity>' + UNICODE_LINE_BREAK + + ' <version number="1.2.3"/>' + UNICODE_LINE_BREAK + + ' <generation date="$Date: 2014-07-08 21:39:31 -0500 (Tue, 08 Jul 2014) $"/>' + UNICODE_LINE_BREAK + + ' <language type="kw" />' + UNICODE_LINE_BREAK + + ' </identity>' + UNICODE_LINE_BREAK + + ' <collations >' + UNICODE_LINE_BREAK + + ' <defaultCollation>wend</defaultCollation>' + UNICODE_LINE_BREAK + + ' <collation type="kis" >' + UNICODE_LINE_BREAK + + ' <cr><![CDATA[' + UNICODE_LINE_BREAK + + ' &x<c<v' + UNICODE_LINE_BREAK + + ' ]]></cr>' + UNICODE_LINE_BREAK + + ' </collation>' + UNICODE_LINE_BREAK + + ' <collation type="wend" >' + UNICODE_LINE_BREAK + + ' <import source="xy" type="one"/>' + UNICODE_LINE_BREAK + + ' <cr><![CDATA[' + UNICODE_LINE_BREAK + + ' &F<<P<<<C' + UNICODE_LINE_BREAK + + ' &L<a<<<Z' + UNICODE_LINE_BREAK + + ' ]]></cr>' + UNICODE_LINE_BREAK + + ' </collation>' + UNICODE_LINE_BREAK + + ' </collations>' + UNICODE_LINE_BREAK + + '</ldml>'; + +function PrepareCollationStream(const AText : string) : TStream; +begin + Result := TMemoryStream.Create(); + if (AText <> '') then + Result.Write(AText[1],(Length(AText)*SizeOf(Char))); +end; + +function PrepareRepositoryLoader() : ICldrCollationLoader; +var + s : array of TStream; +begin + SetLength(s,2); + s[0] := PrepareCollationStream(COLLATION_XML_TEXT); + s[1] := PrepareCollationStream(COLLATION_XML_TEXT2); + Result := TCldrCollationStreamLoader.Create(['xy','kw'],s) as ICldrCollationLoader; +end; + +procedure test_collation_parser_HeaderParsing(); +var + rep : TCldrCollationRepository; + col : TCldrCollation; + typ : TCldrCollationItem; + imp : TCldrImport; +begin + rep := TCldrCollationRepository.Create(PrepareRepositoryLoader()); + try + Check(rep.Find('xy')=nil, 'Find() before load.'); + Check(rep.Find('ab')=nil, 'Find() before load.'); + col := rep.Load('xy',TCldrParserMode.HeaderParsing); + Check(col <> nil, 'load()'); + Check(col.Mode=TCldrParserMode.HeaderParsing, 'Mode'); + Check(rep.Find('xy') <> nil, 'Find() after load.'); + Check(rep.Find('ab')=nil); + WriteLn(' - Step 1 ok'); + + Check(col.DefaultType='one', 'DefaultType'); + Check(col.ItemCount=4, 'col.ItemCount'); + Check(col.Find('one')<>nil, 'col.Find()'); + Check(col.Find('private-two')<>nil, 'col.Find()'); + Check(col.Find('three')<>nil, 'col.Find()'); + Check(col.Find('abc')<>nil, 'col.Find()'); + + WriteLn(' - Step 2 ok'); + + typ := col.Find('private-two'); + check(typ.IsPrivate(),'IsPrivate()'); + + WriteLn(' - Step 3 ok'); + + Check(col.Find('one').Imports.Count=0, 'one.imports=0'); + Check(col.Find('private-two').Imports.Count=0, 'private-two.imports=0'); + Check(col.Find('three').Imports.Count=0, 'three.imports=0'); + + WriteLn(' - Step 4 ok'); + + typ := col.Find('abc'); + check(typ.Imports.Count=2,'abc.imports=2'); + imp := typ.Imports[0]; + check(imp<>nil, 'abc.Imports[0]'); + check( + (imp.Source = 'xy') and (imp.TypeName = 'private-two'), + 'abc.Imports[0]' + ); + imp := typ.Imports[1]; + check(imp<>nil, 'abc.Imports[1]'); + check( + (imp.Source = 'xy') and (imp.TypeName = 'one'), + 'abc.Imports[1]' + ); + + WriteLn(' - Step 5 ok'); + finally + rep.Free(); + end; + + WriteLn(' -- test ok'); +end; + +procedure test_collation_parser_HeaderParsing_2(); +var + rep : TCldrCollationRepository; + col : TCldrCollation; + typ : TCldrCollationItem; + imp : TCldrImport; +begin + rep := TCldrCollationRepository.Create(PrepareRepositoryLoader()); + try + Check(rep.Find('kw')=nil, 'Find() before load.'); + Check(rep.Find('xy')=nil, 'Find() before load.'); + col := rep.Load('kw',TCldrParserMode.HeaderParsing); + Check(col <> nil, 'load()'); + Check(col.Mode=TCldrParserMode.HeaderParsing, 'Mode'); + Check(rep.Find('kw') <> nil, 'Find() after load.'); + WriteLn(' - Step 1 ok'); + + Check(rep.Find('xy')=nil, 'Find(xy) after load.'); + WriteLn(' - Step 2 ok'); + + typ := col.Find('wend'); + check(typ.Imports.Count=1,'wend.imports=1'); + imp := typ.Imports[0]; + check(imp<>nil, 'wend.Imports[0]'); + check( + (imp.Source = 'xy') and (imp.TypeName = 'one'), + 'wend.Imports[0]' + ); + + WriteLn(' - Step 3 ok'); + finally + rep.Free(); + end; + + WriteLn(' -- test ok'); +end; + +function ParseSingleStatement( + const AText : UnicodeString; + AStatement : PReorderSequence +) : Boolean; +var + np, lc : Integer; + u8 : UTF8String; +begin + u8 := UTF8Encode(AText); + np := 0; + lc := 0; + Result := ParseStatement(@u8[1],0,Length(u8),AStatement,np,lc); +end; + +function ParseMultiStatements( + AText : UnicodeString; + AStatementList : PReorderSequence; + const AListLength : Integer +) : Integer; +var + c, nextPos, lineCount, i : Integer; + u8 : UTF8String; + buffer : PAnsiChar; + statement, lastStatement : PReorderSequence; +begin + u8 := UTF8Encode(AText); + c := Length(u8); + buffer := @u8[1]; + nextPos := 0; + i := 0; + lineCount := 0; + statement := AStatementList; + lastStatement := AStatementList+AListLength; + while (i < c) and (statement < lastStatement) do begin + statement^.Clear(); + if not ParseStatement(buffer,i,c,statement,nextPos,lineCount) then + Break; + i := nextPos; + Inc(statement); + end; + Result := statement-AStatementList; +end; + +type + TReorderSequenceArrayRec = record + Data : TReorderSequenceArray; + ActualLengh : Integer; + end; + PReorderSequenceArrayRec = ^TReorderSequenceArrayRec; + +function CopyVisitorFunc( + ARule : PReorderSequence; + AOwner : TCldrCollationItem; + AData : Pointer +) : Boolean; +var + p : PReorderSequenceArrayRec; +begin + p := PReorderSequenceArrayRec(AData); + Result := (p^.ActualLengh < Length(p^.Data)); + if Result then begin + p^.Data[p^.ActualLengh].Assign(ARule); + p^.ActualLengh := p^.ActualLengh+1; + end; +end; + +procedure test_collation_parser_FullParsing(); +var + rep : TCldrCollationRepository; + col : TCldrCollation; + typ : TCldrCollationItem; + imp : TCldrImport; + locStatement : TReorderSequence; + locStatementList : TReorderSequenceArray; + c, i : Integer; +begin + rep := TCldrCollationRepository.Create(PrepareRepositoryLoader()); + try + Check(rep.Find('xy')=nil, 'Find() before load.'); + Check(rep.Find('ab')=nil, 'Find() before load.'); + col := rep.Load('xy',TCldrParserMode.FullParsing); + Check(col <> nil, 'load()'); + Check(col.Mode=TCldrParserMode.FullParsing, 'Mode'); + Check(rep.Find('xy') <> nil, 'Find() after load.'); + Check(rep.Find('ab')=nil); + WriteLn(' - Step 1 ok'); + + Check(col.DefaultType='one', 'DefaultType'); + Check(col.ItemCount=4, 'col.ItemCount'); + Check(col.Find('one')<>nil, 'col.Find()'); + Check(col.Find('private-two')<>nil, 'col.Find()'); + Check(col.Find('three')<>nil, 'col.Find()'); + Check(col.Find('abc')<>nil, 'col.Find()'); + + WriteLn(' - Step 2 ok'); + + typ := col.Find('private-two'); + check(typ.IsPrivate(),'IsPrivate()'); + + WriteLn(' - Step 3 ok'); + + Check(col.Find('one').Imports.Count=0, 'one.imports=0'); + Check(col.Find('private-two').Imports.Count=0, 'private-two.imports=0'); + Check(col.Find('three').Imports.Count=0, 'three.imports=0'); + + WriteLn(' - Step 4 ok'); + + typ := col.Find('abc'); + check(typ.Imports.Count=2,'abc.imports=2'); + imp := typ.Imports[0]; + check(imp<>nil, 'abc.Imports[0]'); + check( + (imp.Source = 'xy') and (imp.TypeName = 'private-two'), + 'abc.Imports[0]' + ); + imp := typ.Imports[1]; + check(imp<>nil, 'abc.Imports[1]'); + check( + (imp.Source = 'xy') and (imp.TypeName = 'one'), + 'abc.Imports[1]' + ); + Check(Length(typ.Rules)=2,'Length(abc.Rules)=2'); + Check(Length(typ.Rules[0].Elements)=2,'Length(typ.Rules[0].Elements)=2'); + Check(typ.Rules[0].Elements[0].WeigthKind=TReorderWeigthKind.Deletion,'typ.Rules[0].Elements[0].WeigthKind=TReorderWeigthKind.Deletion'); + Check(Length(typ.Rules[0].Elements[0].Characters)=1,'Length(typ.Rules[0].Elements[0].Characters)=1'); + Check(typ.Rules[0].Elements[0].Characters[0]=Ord('h'),'typ.Rules[0].Elements[0].Characters[0]=h'); + Check(typ.Rules[0].Elements[1].Characters[0]=Ord('q'),'typ.Rules[0].Elements[1].Characters[0]=q'); + WriteLn(' - Step 5 ok'); + + typ := col.Find('one'); + Check(Length(typ.Rules)>0, 'one.Rules <> nil'); + locStatement.Clear(); + Check(ParseSingleStatement('&h<z<b',@locStatement)); + CheckEqual(locStatement,typ.Rules[0]); + WriteLn(' - Step 6 ok'); + + typ := col.Find('private-two'); + Check(Length(typ.Rules)>0, 'private-two.Rules <> nil'); + c := 2; + SetLength(locStatementList,5); + Check( + ParseMultiStatements( + '&f<c<<<ce' + UNICODE_LINE_BREAK+'&q<qh<<<p ', + @locStatementList[0], + Length(locStatementList) + ) = c + ); + for i := 0 to c-1 do + CheckEqual(locStatementList[i],typ.Rules[i]); + WriteLn(' - Step 7 ok'); + + typ := col.Find('three'); + Check(Length(typ.Rules)>0, 'three.Rules <> nil'); + locStatement.Clear(); + Check(ParseSingleStatement('&d<c<b<a',@locStatement)); + CheckEqual(locStatement,typ.Rules[0]); + WriteLn(' - Step 8 ok'); + finally + rep.Free(); + end; + + WriteLn(' -- test ok'); +end; + +procedure test_collation_parser_FullParsing_2(); +var + rep : TCldrCollationRepository; + col : TCldrCollation; + typ : TCldrCollationItem; + imp : TCldrImport; + locStatementList : TReorderSequenceArray; + c, i : Integer; +begin + rep := TCldrCollationRepository.Create(PrepareRepositoryLoader()); + try + Check(rep.Find('kw')=nil, 'Find() before load.'); + Check(rep.Find('xy')=nil, 'Find() before load.'); + col := rep.Load('kw',TCldrParserMode.FullParsing); + Check(col <> nil, 'load()'); + Check(col.Mode=TCldrParserMode.FullParsing, 'Mode'); + Check(rep.Find('kw') <> nil, 'Find() after load.'); + WriteLn(' - Step 1 ok'); + + typ := col.Find('wend'); + check(typ.Imports.Count=1,'wend.imports=1'); + imp := typ.Imports[0]; + check(imp<>nil, 'wend.Imports[0]'); + check( + (imp.Source = 'xy') and (imp.TypeName = 'one'), + 'wend.Imports[0]' + ); + Check(Length(typ.Rules)>0, 'wend.Rules <> nil'); + c := 2; + SetLength(locStatementList,5); + Check( + ParseMultiStatements( + '&F<<P<<<C' + UNICODE_LINE_BREAK+'&L<a<<<Z ', + @locStatementList[0], + Length(locStatementList) + ) = c + ); + for i := 0 to c-1 do + CheckEqual(locStatementList[i],typ.Rules[i]); + WriteLn(' - Step 2 ok'); + + finally + rep.Free(); + end; + + WriteLn(' -- test ok'); +end; + +procedure test_collation_parser_complete_rules(); +var + rep : TCldrCollationRepository; + col : TCldrCollation; + typ, xtyp : TCldrCollationItem; + c, i : Integer; + locData : TReorderSequenceArrayRec; +begin + rep := TCldrCollationRepository.Create(PrepareRepositoryLoader()); + try + col := rep.Load('xy',TCldrParserMode.FullParsing); + SetLength(locData.Data,23); + + typ := col.Find('one'); + locData.ActualLengh := 0; + Check(ForEachRule(typ,@CopyVisitorFunc,@locData), 'ForEachRule(one) - 1'); + Check(locData.ActualLengh = 1, 'ForEachRule(one) - 2'); + CheckEqual(locData.Data[0],typ.Rules[0]); + WriteLn(' - Step 1 ok'); + + typ := col.Find('private-two'); + locData.ActualLengh := 0; + Check(ForEachRule(typ,@CopyVisitorFunc,@locData), 'ForEachRule(private-two) - 1'); + Check(locData.ActualLengh = 2, 'ForEachRule(private-two) - 2'); + for i := 0 to locData.ActualLengh-1 do + CheckEqual(locData.Data[i],typ.Rules[i]); + WriteLn(' - Step 2 ok'); + + typ := col.Find('abc'); + locData.ActualLengh := 0; + SetLength(locData.Data,23); + Check(ForEachRule(typ,@CopyVisitorFunc,@locData), 'ForEachRule(abc) - 1'); + Check(locData.ActualLengh = 2+2{private-two}+1{one}, 'ForEachRule(abc) - 2'); + xtyp := col.Find('private-two'); + c := 0; + for i := 0 to Length(xtyp.Rules)-1 do + CheckEqual(locData.Data[c+i],xtyp.Rules[i]); + c := c+Length(xtyp.Rules); + xtyp := col.Find('one'); + for i := 0 to Length(xtyp.Rules)-1 do + CheckEqual(locData.Data[c+i],xtyp.Rules[i]); + c := c+Length(xtyp.Rules); + for i := 0 to Length(typ.Rules)-1 do + CheckEqual(locData.Data[c+i],typ.Rules[i]); + WriteLn(' - Step 2 ok'); + finally + rep.Free(); + end; + + WriteLn(' -- test ok'); +end; + +procedure test_collation_parser_complete_rules_2(); +var + rep : TCldrCollationRepository; + col, xcol : TCldrCollation; + typ, xtyp : TCldrCollationItem; + locData : TReorderSequenceArrayRec; + c, i : Integer; +begin + rep := TCldrCollationRepository.Create(PrepareRepositoryLoader()); + try + col := rep.Load('kw',TCldrParserMode.FullParsing); + SetLength(locData.Data,23); + + typ := col.Find('wend'); + locData.ActualLengh := 0; + Check(ForEachRule(typ,@CopyVisitorFunc,@locData), 'ForEachRule(wend) - 1'); + Check(locData.ActualLengh = 2+1{one}, 'ForEachRule(wend) - 2'); + xcol := rep.Find('xy'); + Check(xcol <> nil); + xtyp := xcol.Find('one'); + Check(xtyp <> nil); + Check(Length(xtyp.Rules)=1); + c := 0; + for i := 0 to Length(xtyp.Rules)-1 do + CheckEqual(locData.Data[c+i],xtyp.Rules[i]); + c := c+Length(xtyp.Rules); + for i := 0 to Length(typ.Rules)-1 do + CheckEqual(locData.Data[c+i],typ.Rules[i]); + WriteLn(' - Step 1 ok'); + + finally + rep.Free(); + end; + + WriteLn(' -- test ok'); +end; + +procedure test_unicode_set_1(); +var + x : TUnicodeSet; + i : Integer; + s : string; +begin + x := TUnicodeSet.Create(); + try + for i := 0 to 256-1 do + Check(not x.Contains(AnsiChar(i))); + WriteLn(' - Stept 1 ok'); + + s := 'azerty'; + x.AddPattern(Format('[%s]',[s])); + for i := 1 to Length(s) do + Check(x.Contains(s[i])); + WriteLn(' - Stept 2 ok'); + finally + x.Free(); + end; + + WriteLn(' -- test ok'); +end; + +procedure test_unicode_set_2(); +var + x : TUnicodeSet; + i : Integer; +begin + x := TUnicodeSet.Create(); + try + x.AddPattern('[d-h]'); + for i := Ord('d') to Ord('h') do + Check(x.Contains(Char(i))); + WriteLn(' - Stept 1 ok'); + + for i := Ord('a') to Ord('d')-1 do + Check(not x.Contains(Char(i))); + WriteLn(' - Stept 2 ok'); + + for i := Ord('h')+1 to 256-1 do + Check(not x.Contains(Char(i))); + WriteLn(' - Stept 3 ok'); + finally + x.Free(); + end; + + WriteLn(' -- test ok'); +end; + +procedure test_unicode_set_3(); +var + x : TUnicodeSet; + s, s1 : string; +begin + x := TUnicodeSet.Create(); + try + s := 'azerty'; + x.AddPattern(Format('[{%s}]',[s])); + Check(x.Contains(s)); + WriteLn(' - Stept 1 ok'); + + Check(not x.Contains(s+'12')); + WriteLn(' - Stept 2 ok'); + + Check(not x.Contains('qs'+s)); + WriteLn(' - Stept 3 ok'); + + s1 := s+'x'; + x.AddPattern(Format('[{%s}]',[s1])); + Check(x.Contains(s)); + Check(x.Contains(s1)); + WriteLn(' - Stept 4 ok'); + finally + x.Free(); + end; + + WriteLn(' -- test ok'); +end; + end. diff --git a/utils/unicode/cldrtxt.pas b/utils/unicode/cldrtxt.pas new file mode 100644 index 0000000000..44dd796adc --- /dev/null +++ b/utils/unicode/cldrtxt.pas @@ -0,0 +1,687 @@ +{ Parser of the CLDR collation tailoring files. + This parser handle the textual syntax for CLDR version > 23 + + Copyright (c) 2014,2015 by Inoussa OUEDRAOGO + + The source code is distributed under the Library GNU + General Public License with the following modification: + + - object files and libraries linked into an application may be + distributed without source code. + + If you didn't receive a copy of the file COPYING, contact: + Free Software Foundation + 675 Mass Ave + Cambridge, MA 02139 + USA + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +} +unit cldrtxt; + +{$mode objfpc}{$H+} +{$TypedAddress on} + +interface + +uses + Classes, SysUtils, + cldrhelper, helper; + + procedure ParseInitialDocument(ASequence : POrderedCharacters; ADoc : TCustomMemoryStream);overload; + procedure ParseInitialDocument(ASequence : POrderedCharacters; AFileName : string);overload; + + function ParseStatement( + AData : PAnsiChar; + AStartPosition, + AMaxLen : Integer; + AStatement : PReorderSequence; + var ANextPos, + ALineCount : Integer + ) : Boolean; + +implementation +uses + unicodedata; + +const + s_BEFORE = 'before'; + +function String2UnicodeCodePointArray(const AValue : UTF8String): TUnicodeCodePointArray; +var + u4str : UCS4String; + k : Integer; +begin + if (Length(AValue) = 0) then + exit(nil); + if (Length(AValue) = 1) then begin + SetLength(Result,1); + Result[0] := Ord(AValue[1]) + end else begin + u4str := UnicodeStringToUCS4String(UTF8Decode(AValue)); + k := Length(u4str) - 1; // remove the last #0 + SetLength(Result,k); + for k := 0 to k - 1 do + Result[k] := u4str[k]; + end; +end; + +function TryStringToReorderWeigthKind( + const AStr : UTF8String; + out AResult : TReorderWeigthKind +) : Boolean; +begin + Result := True; + if (AStr = '=') then + AResult := TReorderWeigthKind.Identity + else if (AStr = '<') or (AStr = '>') then + AResult := TReorderWeigthKind.Primary + else if (AStr = '<<') or (AStr = '>>') then + AResult := TReorderWeigthKind.Secondary + else if (AStr = '<<<') or (AStr = '>>>') then + AResult := TReorderWeigthKind.Tertiary + else begin + AResult := TReorderWeigthKind.Identity; + Result := False; + end; +end; + +function ParseStatement( + AData : PAnsiChar; + AStartPosition, + AMaxLen : Integer; + AStatement : PReorderSequence; + var ANextPos, + ALineCount : Integer +) : Boolean; +const + LINE_LENGTH = 1024; +var + p : PAnsiChar; + bufferLength, bufferPos, lineLength, linePos, lineIndex : Integer; + line : UTF8String; + statement : PReorderSequence; + elementActualCount : Integer; + specialChararter : Boolean; + historyItemIndex : Integer; + historyItems : array[0..31] of record + p : PAnsiChar; + bufferLength, + bufferPos, + lineLength, + linePos, + lineIndex : Integer; + line : UTF8String; + end; + + procedure SaveState(); + begin + if (historyItemIndex >= High(historyItems)) then + raise Exception.Create('History buffer is full.'); + historyItemIndex := historyItemIndex+1; + historyItems[historyItemIndex].p := p; + historyItems[historyItemIndex].bufferLength := bufferLength; + historyItems[historyItemIndex].bufferPos := bufferPos; + historyItems[historyItemIndex].lineLength := lineLength; + historyItems[historyItemIndex].linePos := linePos; + historyItems[historyItemIndex].lineIndex := lineIndex; + historyItems[historyItemIndex].line := line; + end; + + procedure RestoreState(); + begin + if (historyItemIndex < 0) then + raise Exception.Create('History buffer is empty.'); + p := historyItems[historyItemIndex].p; + bufferLength := historyItems[historyItemIndex].bufferLength; + bufferPos := historyItems[historyItemIndex].bufferPos; + lineLength := historyItems[historyItemIndex].lineLength; + linePos := historyItems[historyItemIndex].linePos; + lineIndex := historyItems[historyItemIndex].lineIndex; + line := historyItems[historyItemIndex].line; + historyItemIndex := historyItemIndex-1; + end; + + procedure DiscardState(); + begin + if (historyItemIndex < 0) then + raise Exception.Create('History buffer is empty.'); + historyItemIndex := historyItemIndex-1; + end; + + function CurrentLine() : UTF8String; inline; + begin + Result := Copy(line,1,lineLength); + end; + + function NextLine() : Boolean; + var + locOldPos : Integer; + locOldPointer : PAnsiChar; + begin + Result := False; + if (p^ = #10) then begin + Inc(p); + Inc(bufferPos); + end; + locOldPos := bufferPos; + locOldPointer := p; + while (bufferPos < bufferLength) and (p^ <> #10) do begin + Inc(p); + Inc(bufferPos); + end; + if (locOldPos = bufferPos) and (p^ = #10) then begin + lineLength := 0; + Inc(p); + Inc(bufferPos); + linePos := 1; + Result := True; + end else if (locOldPos < bufferPos) then begin + lineLength := (bufferPos - locOldPos); + if (lineLength >= Length(line)) then + SetLength(line,(2*lineLength)); + Move(locOldPointer^,line[1],lineLength); + {if (p^ = #10) then begin + //Dec(lineLength); + Inc(p); + Inc(bufferPos); + end;} + linePos := 1; + Result := True; + end; + if Result and (locOldPos < bufferPos) then + lineIndex := lineIndex+1; + end; + + procedure CheckLineLength(const ALength : Integer); + begin + if (ALength > lineLength) then + raise Exception.CreateFmt('Unexpected end of line : "%s".',[CurrentLine()]); + end; + + function ReadChar(out AResult : UTF8String) : Boolean; + var + k : Integer; + us : UnicodeString; + begin + AResult := ''; + Result := False; + if (linePos > lineLength) then + exit; + {if CharInSet(line[linePos],['#','=','&','[',']']) then begin + AResult := line[linePos]; + Inc(linePos); + exit(True); + end;} + if (line[linePos] <> '\') then begin + AResult := line[linePos]; + Inc(linePos); + exit(True); + end; + CheckLineLength(linePos+1); + Inc(linePos); + case line[linePos] of + '''': begin + AResult := '\'; + exit(True); + end; + {'\' : begin + AResult := '\'; + exit(True); + end;} + 'u' : begin + CheckLineLength(linePos+4); + AResult := '$'+Copy(line,(linePos+1),4); + if not TryStrToInt(AResult,k) then + raise Exception.CreateFmt('Hexadecimal Integer expected but found "%s", line = "%s".',[AResult,CurrentLine()]); + SetLength(us,1); + us[1] := UnicodeChar(k); + AResult := UTF8Encode(us); + Inc(linePos,5); + exit(True); + end; + 'U' : begin + CheckLineLength(linePos+8); + AResult := '$'+Copy(line,(linePos+1),8); + if not TryStrToInt(AResult,k) then + raise Exception.CreateFmt('Hexadecimal Integer expected but found "%s".',[AResult]); + if (k > High(Word)) then begin + SetLength(us,2); + FromUCS4(k,us[1],us[2]); + if (Ord(us[2]) = 0) then + SetLength(us,1); + end else begin + SetLength(us,1); + us[1] := UnicodeChar(k); + end; + AResult := UTF8Encode(us); + Inc(linePos,9); + exit(True); + end; + else + raise Exception.CreateFmt('Invalide escaped string "%s", at %d position.',[CurrentLine(),linePos]); + end; + end; + + function ReadQuotedString() : UTF8String; + var + ks : UTF8String; + begin + if (line[linePos] <> '''') then + raise Exception.CreateFmt('Unexpected character found "%s", a quote expected: "%s".',[line[linePos],CurrentLine()]); + Inc(linePos); + if (linePos > lineLength) then + raise Exception.CreateFmt('Unexpected end of line, a quote expected: "%s".',[CurrentLine()]); + if (line[linePos] = '''') then begin + Inc(linePos); + Result := ''''; + exit; + end; + Result := ''; + while (linePos <= lineLength) and ReadChar(ks) do begin + Result := Result + ks; + if (line[linePos] = '''') then + break; + end; + if (line[linePos] = '''') then begin + Inc(linePos); + exit; + end; + raise Exception.CreateFmt('Unexpected end of line, a quote expected: "%s".',[line]); + end; + + function ReadUnQuotedString() : UTF8String; + var + k : Integer; + begin + k := linePos; + while (linePos <= lineLength) and + not(CharInSet(line[linePos],[' ',#9,'#', '=','&','[',']','<','>','''','/','|'])) + do begin + Inc(linePos); + end; + if (linePos > k) then begin + if (line[linePos] in [' ',#9,'#', '=','&','[',']','<','>','''','/','|']) then + Result := Copy(line,k,(linePos-k)) + else + Result := Copy(line,k,(linePos-k)); //Result := Copy(line,k,(linePos-k+1)); + end else begin + Result := ''; + end; + end; + + function NextToken() : UTF8String; overload; + var + k : Integer; + ks : UTF8String; + begin + specialChararter := False; + while True do begin + while (linePos <= lineLength) and CharInSet(line[linePos],[' ', #9, #13]) do begin + Inc(linePos); + end; + if (linePos > lineLength) or (line[linePos] = '#') then begin + if not NextLine() then + exit(''); + Continue; + end ; + Break; + end; + if (linePos > lineLength) then + exit(''); + + if (line[linePos] = '*') then begin + linePos := linePos+1; + specialChararter := True; + exit('*'); + end; + k := linePos; + if (linePos <= lineLength) and CharInSet(line[linePos],['<','>']) then begin + ks := line[linePos]; + while (linePos <= lineLength) and (line[linePos] = ks) do begin + Inc(linePos); + end; + Result := Copy(line,k,(linePos-k)); + exit; + end; + if (linePos <= lineLength) and + CharInSet(line[linePos],['=','&','[',']','<','>','/','|']) + then begin + Inc(linePos); + Result := Copy(line,k,(linePos-k)); + specialChararter := True; + exit; + end; + {if (line[linePos] = '''') then + exit(ReadQuotedString()); } + Result := ''; + while (linePos <= lineLength) do begin + if CharInSet(line[linePos],[' ',#9,#13,'#', '=','&','[',']','<','>','/','|']) then + Break; + if (line[linePos] <> '''') then + ks := ReadUnQuotedString() + else + ks := ReadQuotedString(); + if (ks = '') then + Break; + Result := Result + ks; + end; + end; + + function NextToken(const AMustSucceed : Boolean) : UTF8String; overload; + begin + Result := NextToken(); + if (Result = '') and AMustSucceed then + raise Exception.CreateFmt('Unexpected end of line(%d) : "%s".',[lineIndex,CurrentLine()]); + end; + + procedure CheckToken(const AActual, AExpectedToken : UTF8String); + begin + if (AActual <> AExpectedToken) then + raise Exception.CreateFmt( + '"%s" expected but "%s" found at position %d, BufferPosition(%d), line(%d) = "%s".', + [AExpectedToken,AActual,linePos,bufferPos,lineIndex,CurrentLine()] + ); + end; + + function parse_reset() : Boolean; + var + s, s1 : UTF8String; + logicalPos : TReorderLogicalReset; + k : Integer; + begin + s := NextToken(); + if (s = '') then + exit(False); + CheckToken(s,'&'); + s := NextToken(True); + if (s = '[') then begin + s := NextToken(); + if (s = s_BEFORE) then begin + s := NextToken(); + if not(TryStrToInt(s,k)) or (k < 1) or (k > 3) then + CheckToken(s,'"1" or "2" or "3"'); + CheckToken(NextToken(True),']'); + statement^.Reset := String2UnicodeCodePointArray(NextToken(True)); + statement^.Before := True; + end else begin + while True do begin + s1 := NextToken(); + if (s1 = '') or (s1 = ']') then + break; + s := s + Trim(s1) + end; + CheckToken(s1,']'); + if (s = '') then + raise Exception.CreateFmt('Unexpected end of line : "%s".',[CurrentLine()]); + if not TryStrToLogicalReorder(s,logicalPos) then + raise Exception.CreateFmt(sUnknownResetLogicalPosition,[s]); + statement^.LogicalPosition := logicalPos; + end; + end else begin + statement^.Reset := String2UnicodeCodePointArray(s); + end; + if (statement^.LogicalPosition = TReorderLogicalReset.None) and + (Length(statement^.Reset) = 0) + then + raise Exception.Create(sInvalidResetClause); + Result := True; + end; + + procedure EnsureElementLength(const ALength : Integer); + var + k, d : Integer; + begin + k := Length(statement^.Elements); + if (k < ALength) then begin + k := ALength; + if (k = 0) then begin + k := 50; + end else begin + if (k < 10) then + d := 10 + else + d := 2; + k := k * d; + end; + statement^.SetElementCount(k); + end; + end; + + procedure AddElement( + const AChars : array of UCS4Char; + const AWeigthKind : TReorderWeigthKind; + const AContext : UTF8String + );overload; + var + kp : PReorderUnit; + kc, k : Integer; + begin + EnsureElementLength(elementActualCount+1); + kp := @statement^.Elements[elementActualCount]; + kc := Length(AChars)-1; + if (kc < 0) then + kc := 0; + SetLength(kp^.Characters,kc); + for k := 0 to kc - 1 do + kp^.Characters[k] := AChars[k]; + kp^.WeigthKind := AWeigthKind; + elementActualCount := elementActualCount + 1; + if (AContext <> '') then + kp^.Context := String2UnicodeCodePointArray(AContext); + end; + + procedure AddElement( + const AChar : UCS4Char; + const AWeigthKind : TReorderWeigthKind; + const AContext : UTF8String + );overload; + var + kp : PReorderUnit; + kc, k : Integer; + begin + EnsureElementLength(elementActualCount+1); + kp := @statement^.Elements[elementActualCount]; + SetLength(kp^.Characters,1); + kp^.Characters[0] := AChar; + kp^.WeigthKind := AWeigthKind; + elementActualCount := elementActualCount + 1; + if (AContext <> '') then + kp^.Context := String2UnicodeCodePointArray(AContext); + end; + + function ReadNextItem() : Boolean; + var + contextStr : UTF8String; + w : TReorderWeigthKind; + last : PReorderUnit; + u4str : UCS4String; + s, ts : UTF8String; + expandStr : TUnicodeCodePointArray; + k, kc, x : Integer; + us : UnicodeString; + begin + contextStr := ''; + expandStr := nil; + Result := False; + SaveState(); + s := NextToken(); + if (s = '') then begin + DiscardState(); + exit; + end; + if specialChararter and (s = '&') then begin + RestoreState(); + exit; + end; + DiscardState(); + if not TryStringToReorderWeigthKind(s,w) then + CheckToken(s,'Reorder Weigth'); + s := NextToken(True); + if specialChararter then begin + if (s = '[') then begin + k := 1; + while True do begin + ts := NextToken(True); + s := s + ts; + if specialChararter then begin + if (ts = '[') then + k := k+1 + else if (ts = ']') then begin + k := k-1; + if (k = 0) then + Break; + end; + end; + end; + if (Pos('variable',s) > 0) then + exit(True); + end else if (s = '*') then begin + s := NextToken(True); + us := UTF8Decode(s); + u4str := UnicodeStringToUCS4String(us); + kc := Length(u4str)-1; + k := 0; + while (k <= (kc-1)) do begin + if (k > 0) and (u4str[k] = Ord('-')) then begin + if (k = (kc-1)) then begin + AddElement(u4str[k],w,contextStr); + end else begin + for x := (u4str[k-1]+1) to u4str[k+1] do + AddElement(x,w,contextStr); + k := k+1; + end; + end else begin + AddElement(u4str[k],w,contextStr); + end; + k := k+1; + end; + exit(True); + end; + end; + SaveState(); + ts := NextToken(); + if (ts = '') or not(specialChararter) then begin + RestoreState(); + us := UTF8Decode(s); + u4str := UnicodeStringToUCS4String(us); + end else begin + if (ts = '|') then begin + DiscardState(); + contextStr := s; + s := NextToken(True); + SaveState(); + ts := NextToken(); + end; + if specialChararter and (ts = '/') then begin + expandStr := String2UnicodeCodePointArray(NextToken(True)); + DiscardState(); + end else begin + RestoreState(); + end; + u4str := UnicodeStringToUCS4String(UTF8Decode(s)); + end; + AddElement(u4str,w,contextStr); + if (Length(expandStr) > 0) then begin + last := @statement^.Elements[elementActualCount-1]; + last^.ExpansionChars := expandStr; + end; + Result := True; + end; + +begin + Result := False; + elementActualCount := 0; + if (AStartPosition >= AMaxLen) then + exit; + historyItemIndex := -1; + lineIndex := ALineCount; + bufferLength := AMaxLen; + bufferPos := AStartPosition; + p := AData+AStartPosition; + SetLength(line,LINE_LENGTH); + statement := AStatement; + statement^.Clear(); + if not NextLine() then + exit; + if not parse_reset() then + exit; + while ReadNextItem() do begin + // All done in the condition + end; + statement^.SetElementCount(elementActualCount); + if (linePos > lineLength) then + linePos := lineLength; + ANextPos := bufferPos-lineLength+linePos; + Result := (ANextPos > AStartPosition); + ALineCount := lineIndex; +end; + +procedure ParseInitialDocument(ASequence : POrderedCharacters; ADoc : TCustomMemoryStream); +var + buffer : PAnsiChar; + bufferLength : Integer; + i, nextPost : Integer; + statement : TReorderSequence; + p : PReorderUnit; + lineCount : Integer; +begin + if (ADoc.Size < 1) then + exit; + buffer := ADoc.Memory; //0xEF,0xBB,0xBF + bufferLength := ADoc.Size; + if (bufferLength >= 3) and + (Byte(buffer[0]) = $EF) and + (Byte(buffer[1]) = $BB) and + (Byte(buffer[2]) = $BF) + then begin + Inc(buffer,3); + Dec(bufferLength,3); + end; + lineCount := 0; + ASequence^.Clear(); + SetLength(ASequence^.Data,50000); + nextPost := 0; + i := 0; + while (i < bufferLength) do begin + statement.Clear(); + if not ParseStatement(buffer,i,bufferLength,@statement,nextPost,lineCount) then + Break; + i := nextPost; + try + ASequence^.ApplyStatement(@statement); + except + on e : Exception do begin + e.Message := Format('%s Position = %d',[e.Message,i]); + raise; + end; + end; + end; + if (ASequence^.ActualLength > 0) then begin + p := @ASequence^.Data[0]; + for i := 0 to ASequence^.ActualLength - 1 do begin + p^.Changed := False; + Inc(p); + end; + end; +end; + +procedure ParseInitialDocument(ASequence : POrderedCharacters; AFileName : string); +var + doc : TMemoryStream; +begin + doc := TMemoryStream.Create(); + try + doc.LoadFromFile(AFileName); + doc.Position := 0; + ParseInitialDocument(ASequence,doc); + finally + doc.Free(); + end; +end; + + +end. + diff --git a/utils/unicode/cldrxml.pas b/utils/unicode/cldrxml.pas index 49d57d7f08..f14c7f4ed1 100644 --- a/utils/unicode/cldrxml.pas +++ b/utils/unicode/cldrxml.pas @@ -1,6 +1,6 @@ { Parser of the CLDR collation xml files. - Copyright (c) 2013 by Inoussa OUEDRAOGO + Copyright (c) 2013, 2014, 2015 by Inoussa OUEDRAOGO The source code is distributed under the Library GNU General Public License with the following modification: @@ -19,6 +19,12 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. } +{ The procedure whoses names lasted by 'XML' (ParseInitialDocumentXML, + ParseCollationDocumentXML, ...) are for older CLDR versions (CDLR <= 23); The + old version was unsing a XML syntax for collation's rules specifications. + The new versions (and going forward) will be using the text syntax. +} + unit cldrxml; {$mode objfpc}{$H+} @@ -29,49 +35,123 @@ uses Classes, SysUtils, DOM, cldrhelper; - procedure ParseInitialDocument(ASequence : POrderedCharacters; ADoc : TDOMDocument);overload; - procedure ParseInitialDocument(ASequence : POrderedCharacters; AFileName : string);overload; +type + + { TCldrCollationFileLoader } + + TCldrCollationFileLoader = class(TInterfacedObject,ICldrCollationLoader) + private + FPath : string; + private + procedure SetPath(APath : string); + function BuildFileName(ALanguage : string) : string; + procedure CheckFile(AFileName : string); + protected + procedure LoadCollation( + const ALanguage : string; + ACollation : TCldrCollation; + AMode : TCldrParserMode + ); + procedure LoadCollationType( + const ALanguage, + ATypeName : string; + AType : TCldrCollationItem + ); + public + constructor Create(APath : string); + end; + + { TCldrCollationStreamLoader } + + TCldrCollationStreamLoader = class(TInterfacedObject,ICldrCollationLoader) + private + FLanguages : array of string; + FStreams : array of TStream; + private + procedure CheckContent(ALanguage : string); + function IndexOf(ALanguage : string) : Integer; + protected + procedure LoadCollation( + const ALanguage : string; + ACollation : TCldrCollation; + AMode : TCldrParserMode + ); + procedure LoadCollationType( + const ALanguage, + ATypeName : string; + AType : TCldrCollationItem + ); + public + constructor Create( + const ALanguages : array of string; + const AStreams : array of TStream + ); + destructor Destroy();override; + end; + + procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; ADoc : TDOMDocument);overload; + procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; AFileName : string);overload; - procedure ParseCollationDocument( + procedure ParseCollationDocumentXML( ADoc : TDOMDocument; ACollation : TCldrCollation; AMode : TCldrParserMode );overload; - procedure ParseCollationDocument( + procedure ParseCollationDocumentXML( + ADoc : TDOMDocument; + ACollation : TCldrCollationItem; + AType : string + );overload; + procedure ParseCollationDocumentXML( const AFileName : string; ACollation : TCldrCollation; AMode : TCldrParserMode );overload; + procedure ParseCollationDocumentXML( + const AFileName : string; + ACollation : TCldrCollationItem; + AType : string + );overload; - procedure ParseCollationDocument( + //----------------------------------------------------- + procedure ParseCollationDocument2( + ADoc : TDOMDocument; + ACollation : TCldrCollation; + AMode : TCldrParserMode + );overload; + procedure ParseCollationDocument2( + const AFileName : string; + ACollation : TCldrCollation; + AMode : TCldrParserMode + );overload; + procedure ParseCollationDocument2( + AStream : TStream; + ACollation : TCldrCollation; + AMode : TCldrParserMode + );overload; + + procedure ParseCollationDocument2( const AFileName : string; ACollation : TCldrCollationItem; AType : string );overload; - procedure ParseCollationDocument( + procedure ParseCollationDocument2( ADoc : TDOMDocument; ACollation : TCldrCollationItem; AType : string );overload; - -resourcestring - sCaseNothandled = 'This case is not handled : "%s", Position = %d.'; - sCodePointExpected = 'Code Point node expected as child at this position "%d".'; - sCollationsNodeNotFound = '"collations" node not found.'; - sCollationTypeNotFound = 'collation "Type" not found : "%s".'; - sHexAttributeExpected = '"hex" attribute expected at this position "%d".'; - sInvalidResetClause = 'Invalid "Reset" clause.'; - sNodeNameAssertMessage = 'Expected NodeName "%s", got "%s".'; - sRulesNodeNotFound = '"rules" node not found.'; - sTextNodeChildExpected = '(Child) text node expected at this position "%d", but got "%s".'; - sUniqueChildNodeExpected = 'Unique child node expected at this position "%d".'; - sUnknownResetLogicalPosition = 'Unknown reset logical position : "%s".'; + procedure ParseCollationDocument2( + AStream : TStream; + ACollation : TCldrCollationItem; + AType : string + );overload; implementation uses - typinfo, XMLRead, XPath, Helper, unicodeset; + typinfo, RtlConsts, XMLRead, XPath, Helper, unicodeset, cldrtxt; const + s_ALT = 'alt'; s_AT = 'at'; //s_BEFORE = 'before'; s_CODEPOINT = 'codepoint'; @@ -81,12 +161,16 @@ const //s_DEFAULT = 'default'; s_EXTEND = 'extend'; s_HEX = 'hex'; + s_IMPORT = 'import'; s_POSITION = 'position'; s_RESET = 'reset'; s_RULES = 'rules'; + s_SOURCE = 'source'; //s_STANDART = 'standard'; s_TYPE = 'type'; + s_CR = 'cr'; + procedure CheckNodeName(ANode : TDOMNode; const AExpectedName : DOMString); begin if (ANode.NodeName <> AExpectedName) then @@ -124,23 +208,7 @@ begin end; end; -function TryStrToLogicalReorder( - const AValue : string; - out AResult : TReorderLogicalReset -) : Boolean; -var - s : string; - i : Integer; -begin - s := StringReplace(AValue,' ','',[rfReplaceAll]); - s := StringReplace(s,'_','',[rfReplaceAll]); - i := GetEnumValue(TypeInfo(TReorderLogicalReset),s); - Result := (i > -1); - if Result then - AResult := TReorderLogicalReset(i); -end; - -function ParseStatement( +function ParseStatementXML( ARules : TDOMElement; AStartPosition : Integer; AStatement : PReorderSequence; @@ -393,7 +461,7 @@ begin ANextPos := i; end; -procedure ParseInitialDocument(ASequence : POrderedCharacters; ADoc : TDOMDocument); +procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; ADoc : TDOMDocument); var n : TDOMNode; rulesElement : TDOMElement; @@ -412,7 +480,7 @@ begin i := 0; while (i < c) do begin statement.Clear(); - if not ParseStatement(rulesElement,i,@statement,nextPost) then + if not ParseStatementXML(rulesElement,i,@statement,nextPost) then Break; i := nextPost; try @@ -433,13 +501,13 @@ begin end; end; -procedure ParseInitialDocument(ASequence : POrderedCharacters; AFileName : string); +procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; AFileName : string); var doc : TXMLDocument; begin ReadXMLFile(doc,AFileName); try - ParseInitialDocument(ASequence,doc); + ParseInitialDocumentXML(ASequence,doc); finally doc.Free(); end; @@ -500,10 +568,10 @@ begin it.Free(); uset.Free(); end; - SetLength(r,0); + r := nil; end; -procedure ParseCollationItem( +procedure ParseCollationItemXML( ACollationNode : TDOMElement; AItem : TCldrCollationItem; AMode : TCldrParserMode @@ -544,7 +612,105 @@ begin i := 0; while (i < c) do begin statement^.Clear(); - if not ParseStatement(rulesElement,i,statement,nextPos) then + if not ParseStatementXML(rulesElement,i,statement,nextPos) then + Break; + i := nextPos; + Inc(statement); + Inc(sal); + if (sal >= Length(statementList)) then begin + SetLength(statementList,(sal*2)); + statement := @statementList[(sal-1)]; + end; + end; + end; + SetLength(statementList,sal); + AItem.Rules := statementList; + end; +end; + +procedure ParseImports(ACollationNode : TDOMElement; AItem : TCldrCollationItem); +var + locList : TXPathVariable; + i : Integer; + nd, locAtt : TDOMNode; + locSource, locType : string; +begin + locList := EvaluateXPathExpression(s_IMPORT,ACollationNode); + try + if not locList.InheritsFrom(TXPathNodeSetVariable) then + exit; + for i := 0 to locList.AsNodeSet.Count-1 do begin + nd := TDOMNode(locList.AsNodeSet[i]); + if (nd.Attributes <> nil) then begin + locSource := ''; + locType := ''; + locAtt := nd.Attributes.GetNamedItem(s_SOURCE); + if (locAtt <> nil) then + locSource := locAtt.NodeValue; + locAtt := nd.Attributes.GetNamedItem(s_TYPE); + if (locAtt <> nil) then + locType := locAtt.NodeValue; + end; + if (locType <> '') then + AItem.Imports.Add(locSource,locType); + end; + finally + locList.Free(); + end; +end; + +procedure ParseCollationItem2( + ACollationNode : TDOMElement; + AItem : TCldrCollationItem; + AMode : TCldrParserMode +); +var + n : TDOMNode; + rulesElement : TDOMCDATASection; + i, c, nextPos : Integer; + statementList : TReorderSequenceArray; + sal : Integer;//statement actual length + statement : PReorderSequence; + s : DOMString; + u8 : UTF8String; + buffer : PAnsiChar; + lineCount : Integer; +begin + AItem.TypeName := ACollationNode.GetAttribute(s_TYPE); + AItem.Alt := ACollationNode.GetAttribute(s_ALT); + AItem.Base := EvaluateXPathStr('base',ACollationNode); + AItem.Backwards := (EvaluateXPathStr('settings/@backwards',ACollationNode) = 'on'); + if AItem.Backwards then + AItem.ChangedFields := AItem.ChangedFields + [TCollationField.BackWard]; + ParseImports(ACollationNode,AItem); + AItem.Rules := nil; + if (AMode = TCldrParserMode.FullParsing) then begin + SetLength(statementList,15); + sal := 0; + statement := @statementList[0]; + s := EvaluateXPathStr('suppress_contractions',ACollationNode); + if (s <> '') then begin + if (ParseDeletion(s,statement) > 0) then begin + Inc(sal); + Inc(statement); + end else begin + statement^.Clear(); + end; + end; + n := ACollationNode.FindNode(s_CR); + if (n <> nil) then begin + n := (n as TDOMElement).FirstChild; + rulesElement := n as TDOMCDATASection; + s := rulesElement.Data; + u8 := UTF8Encode(s); + c := Length(u8); + buffer := @u8[1]; + nextPos := 0; + i := 0; + lineCount := 0; + while (i < c) do begin + statement^.Clear(); + if not ParseStatement(buffer,i,c,statement,nextPos,lineCount) then Break; i := nextPos; Inc(statement); @@ -560,7 +726,7 @@ begin end; end; -procedure ParseCollationDocument( +procedure ParseCollationDocumentXML( ADoc : TDOMDocument; ACollation : TCldrCollation; AMode : TCldrParserMode @@ -577,6 +743,7 @@ begin raise Exception.Create(sCollationsNodeNotFound); collationsElement := n as TDOMElement; ACollation.Clear(); + ACollation.Mode := AMode; ACollation.Language := EvaluateXPathStr('identity/language/@type',ADoc.DocumentElement); ACollation.Version := EvaluateXPathStr('identity/version/@number',ADoc.DocumentElement); ACollation.DefaultType := EvaluateXPathStr('collations/default/@type',ADoc.DocumentElement); @@ -589,7 +756,7 @@ begin n := nl[i]; if (n.NodeName = s_COLLATION) then begin item := TCldrCollationItem.Create(); - ParseCollationItem((n as TDOMElement),item,AMode); + ParseCollationItemXML((n as TDOMElement),item,AMode); ACollation.Add(item); item := nil; end @@ -601,7 +768,7 @@ begin end; end; -procedure ParseCollationDocument( +procedure ParseCollationDocumentXML( ADoc : TDOMDocument; ACollation : TCldrCollationItem; AType : string @@ -614,7 +781,68 @@ begin if (xv.AsNodeSet.Count = 0) then raise Exception.CreateFmt(sCollationTypeNotFound,[AType]); ACollation.Clear(); - ParseCollationItem((TDOMNode(xv.AsNodeSet[0]) as TDOMElement),ACollation,TCldrParserMode.FullParsing); + ParseCollationItemXML((TDOMNode(xv.AsNodeSet[0]) as TDOMElement),ACollation,TCldrParserMode.FullParsing); + finally + xv.Free(); + end +end; + +procedure ParseCollationDocument2( + ADoc : TDOMDocument; + ACollation : TCldrCollation; + AMode : TCldrParserMode +); +var + n : TDOMNode; + collationsElement : TDOMElement; + i, c : Integer; + item : TCldrCollationItem; + nl : TDOMNodeList; +begin + n := ADoc.DocumentElement.FindNode(s_COLLATIONS); + if (n = nil) then + raise Exception.Create(sCollationsNodeNotFound); + collationsElement := n as TDOMElement; + ACollation.Clear(); + ACollation.Mode := AMode; + ACollation.Language := EvaluateXPathStr('identity/language/@type',ADoc.DocumentElement); + ACollation.Version := EvaluateXPathStr('identity/version/@number',ADoc.DocumentElement); + ACollation.DefaultType := EvaluateXPathStr('collations/defaultCollation',ADoc.DocumentElement); + if collationsElement.HasChildNodes() then begin + nl := collationsElement.ChildNodes; + c := nl.Count; + item := nil; + try + for i := 0 to c - 1 do begin + n := nl[i]; + if (n.NodeName = s_COLLATION) then begin + item := TCldrCollationItem.Create(); + ParseCollationItem2((n as TDOMElement),item,AMode); + ACollation.Add(item); + item := nil; + end + end; + except + FreeAndNil(item); + raise; + end; + end; +end; + +procedure ParseCollationDocument2( + ADoc : TDOMDocument; + ACollation : TCldrCollationItem; + AType : string +); +var + xv : TXPathVariable; +begin + xv := EvaluateXPathExpression(Format('collations/collation[@type=%s]',[QuotedStr(AType)]),ADoc.DocumentElement); + try + if (xv.AsNodeSet.Count = 0) then + raise Exception.CreateFmt(sCollationTypeNotFound,[AType]); + ACollation.Clear(); + ParseCollationItem2((TDOMNode(xv.AsNodeSet[0]) as TDOMElement),ACollation,TCldrParserMode.FullParsing); finally xv.Free(); end @@ -650,7 +878,40 @@ begin end; end; -procedure ParseCollationDocument( +procedure ParseCollationDocumentXML( + const AFileName : string; + ACollation : TCldrCollation; + AMode : TCldrParserMode +); +var + doc : TXMLDocument; +begin + doc := ReadXMLFile(AFileName); + try + ParseCollationDocumentXML(doc,ACollation,AMode); + ACollation.LocalID := ExtractFileName(ChangeFileExt(AFileName,'')); + finally + doc.Free(); + end; +end; + +procedure ParseCollationDocumentXML( + const AFileName : string; + ACollation : TCldrCollationItem; + AType : string +); +var + doc : TXMLDocument; +begin + doc := ReadXMLFile(AFileName); + try + ParseCollationDocumentXML(doc,ACollation,AType); + finally + doc.Free(); + end; +end; + +procedure ParseCollationDocument2( const AFileName : string; ACollation : TCldrCollation; AMode : TCldrParserMode @@ -660,14 +921,30 @@ var begin doc := ReadXMLFile(AFileName); try - ParseCollationDocument(doc,ACollation,AMode); + ParseCollationDocument2(doc,ACollation,AMode); ACollation.LocalID := ExtractFileName(ChangeFileExt(AFileName,'')); finally doc.Free(); end; end; -procedure ParseCollationDocument( +procedure ParseCollationDocument2( + AStream : TStream; + ACollation : TCldrCollation; + AMode : TCldrParserMode +); +var + doc : TXMLDocument; +begin + doc := ReadXMLFile(AStream); + try + ParseCollationDocument2(doc,ACollation,AMode); + finally + doc.Free(); + end; +end; + +procedure ParseCollationDocument2( const AFileName : string; ACollation : TCldrCollationItem; AType : string @@ -677,10 +954,165 @@ var begin doc := ReadXMLFile(AFileName); try - ParseCollationDocument(doc,ACollation,AType); + ParseCollationDocument2(doc,ACollation,AType); finally doc.Free(); end; end; +procedure ParseCollationDocument2( + AStream : TStream; + ACollation : TCldrCollationItem; + AType : string +); +var + doc : TXMLDocument; +begin + doc := ReadXMLFile(AStream); + try + ParseCollationDocument2(doc,ACollation,AType); + finally + doc.Free(); + end; +end; + +{ TCldrCollationStreamLoader } + +procedure TCldrCollationStreamLoader.CheckContent(ALanguage: string); +begin + if not FileExists(ALanguage) then + raise EFOpenError.CreateFmt(SFOpenError,[ALanguage]); +end; + +function TCldrCollationStreamLoader.IndexOf(ALanguage: string): Integer; +var + i : Integer; +begin + for i := Low(FLanguages) to High(FLanguages) do begin + if (FLanguages[i] = ALanguage) then begin + Result := i; + exit; + end; + end; + Result := -1; +end; + +procedure TCldrCollationStreamLoader.LoadCollation( + const ALanguage : string; + ACollation : TCldrCollation; + AMode : TCldrParserMode +); +var + i : Integer; + locStream : TStream; +begin + i := IndexOf(ALanguage); + if (i < 0) then + CheckContent(ALanguage); + locStream := FStreams[i]; + locStream.Position := 0; + ParseCollationDocument2(locStream,ACollation,AMode); +end; + +procedure TCldrCollationStreamLoader.LoadCollationType( + const ALanguage, + ATypeName : string; + AType : TCldrCollationItem +); +var + i : Integer; + locStream : TStream; +begin + i := IndexOf(ALanguage); + if (i < 0) then + CheckContent(ALanguage); + locStream := FStreams[i]; + locStream.Position := 0; + ParseCollationDocument2(locStream,AType,ATypeName); +end; + +constructor TCldrCollationStreamLoader.Create( + const ALanguages : array of string; + const AStreams : array of TStream +); +var + c, i : Integer; +begin + c := Length(ALanguages); + if (Length(AStreams) < c) then + c := Length(AStreams); + SetLength(FLanguages,c); + SetLength(FStreams,c); + for i := Low(ALanguages) to High(ALanguages) do begin + FLanguages[i] := ALanguages[i]; + FStreams[i] := AStreams[i]; + end; +end; + +destructor TCldrCollationStreamLoader.Destroy(); +var + i : Integer; +begin + for i := Low(FStreams) to High(FStreams) do + FreeAndNil(FStreams[i]); +end; + +{ TCldrCollationFileLoader } + +procedure TCldrCollationFileLoader.SetPath(APath: string); +var + s : string; +begin + if (APath = '') then + s := '' + else + s := IncludeLeadingPathDelimiter(APath); + if (s <> FPath) then + FPath := s; +end; + +function TCldrCollationFileLoader.BuildFileName(ALanguage: string): string; +begin + Result := Format('%s%s.xml',[FPath,ALanguage]); +end; + +procedure TCldrCollationFileLoader.CheckFile(AFileName: string); +begin + if not FileExists(AFileName) then + raise EFOpenError.CreateFmt(SFOpenError,[AFileName]); +end; + +procedure TCldrCollationFileLoader.LoadCollation( + const ALanguage : string; + ACollation : TCldrCollation; + AMode : TCldrParserMode +); +var + locFileName : string; +begin + locFileName := BuildFileName(ALanguage); + CheckFile(locFileName); + ACollation.Clear(); + ParseCollationDocument2(locFileName,ACollation,AMode); +end; + +procedure TCldrCollationFileLoader.LoadCollationType( + const ALanguage, + ATypeName : string; + AType : TCldrCollationItem +); +var + locFileName : string; +begin + locFileName := BuildFileName(ALanguage); + CheckFile(locFileName); + AType.Clear(); + ParseCollationDocument2(locFileName,AType,ATypeName); +end; + +constructor TCldrCollationFileLoader.Create(APath: string); +begin + SetPath(APath); +end; + end. diff --git a/utils/unicode/grbtree.pas b/utils/unicode/grbtree.pas index b493619a43..259fbad54b 100644 --- a/utils/unicode/grbtree.pas +++ b/utils/unicode/grbtree.pas @@ -419,8 +419,10 @@ begin end; // Stop if found - if (cp.Compare(q^.Data,AData) = 0) then + if (cp.Compare(q^.Data,AData) = 0) then begin + Result := q; break; + end; last := dir; dir := (cp.Compare(q^.Data,AData) < 0); diff --git a/utils/unicode/helper.pas b/utils/unicode/helper.pas index 1e10910f5d..b0a257af96 100644 --- a/utils/unicode/helper.pas +++ b/utils/unicode/helper.pas @@ -1,6 +1,6 @@ { Unicode parser helper unit. - Copyright (c) 2012 by Inoussa OUEDRAOGO + Copyright (c) 2012-2015 by Inoussa OUEDRAOGO The source code is distributed under the Library GNU General Public License with the following modification: @@ -66,6 +66,7 @@ const ' but WITHOUT ANY WARRANTY; without even the implied warranty of ' + sLineBreak + ' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }'; + WEIGHT_LEVEL_COUNT = 3; type // Unicode General Category @@ -680,6 +681,14 @@ type procedure ReverseBytes(var AData; const ALength : Integer); procedure ReverseArray(var AValue; const AArrayLength, AItemSize : PtrInt); + function CalcMaxLevel2Value(ALines : array of TUCA_LineRec) : Cardinal; + procedure RewriteLevel2Values(ALines : PUCA_LineRec; ALength : Integer); + function RewriteLevel2( + const ALevel1Value : Cardinal; + ALines : PUCA_LineRec; + const ALinesLength : Integer + ) : Integer; + resourcestring SInsufficientMemoryBuffer = 'Insufficient Memory Buffer'; @@ -1728,6 +1737,8 @@ var Inc(actualPropLen); end; locData.PropID := k; + if (actualDataLen >= Length(ADataLineList)) then + SetLength(ADataLineList,(2*Length(ADataLineList))); ADataLineList[actualDataLen] := locData; Inc(actualDataLen); end; @@ -2477,7 +2488,7 @@ var a := LowerCase(Trim(AToken)); b := LowerCase(Trim(NextToken())); if (a <> b) then - raise Exception.CreateFmt('Expected token "%s" but found "%s".',[a,b]); + raise Exception.CreateFmt('Expected token "%s" but found "%s", Line = "%s".',[a,b,line]); end; function ReadWeightBlock(var ADest : TUCA_WeightRec) : Boolean; @@ -2498,7 +2509,7 @@ var ADest.Variable := True; end; ADest.Weights[0] := StrToInt('$'+NextToken()); - for k := 1 to 3 do begin + for k := 1 to WEIGHT_LEVEL_COUNT-1 do begin CheckToken('.'); ADest.Weights[k] := StrToInt('$'+NextToken()); end; @@ -2638,8 +2649,11 @@ begin exit(-1); end; -Procedure QuickSort(var AList: TUCA_DataBookIndex; L, R : Longint; - ABook : PUCA_DataBook); +procedure QuickSort( + var AList : TUCA_DataBookIndex; + L, R : Longint; + ABook : PUCA_DataBook +);overload; var I, J : Longint; P, Q : Integer; @@ -4673,6 +4687,201 @@ begin end; end; +Procedure QuickSort(AList : PCardinal; L, R : Longint);overload; +var + I, J : Longint; + P, Q : Cardinal; +begin + repeat + I := L; + J := R; + P := AList[ (L + R) div 2 ]; + repeat + while (P > AList[i]) do + I := I + 1; + while (P < AList[J]) do + J := J - 1; + If I <= J then + begin + Q := AList[I]; + AList[I] := AList[J]; + AList[J] := Q; + I := I + 1; + J := J - 1; + end; + until I > J; + if J - L < R - I then + begin + if L < J then + QuickSort(AList, L, J); + L := I; + end + else + begin + if I < R then + QuickSort(AList, I, R); + R := J; + end; + until L >= R; +end; + +function CalcMaxLevel2Count( + const ALevel1Value : Cardinal; + ALines : array of TUCA_LineRec +) : Integer; +var + i, c, k : Integer; + ac : Integer; + items : array of Cardinal; + p : PUCA_LineRec; + pw : ^TUCA_WeightRec; +begin + c := Length(ALines); + if (c < 1) then + exit(0); + SetLength(items,0); + ac := 0; + p := @ALines[Low(ALines)]; + for i := 0 to c-1 do begin + if (Length(p^.Weights) > 0) then begin + pw := @p^.Weights[Low(p^.Weights)]; + for k := 0 to Length(p^.Weights)-1 do begin + if (pw^.Weights[0] = ALevel1Value) then begin + if (ac = 0) or (IndexDWord(items[0],ac,pw^.Weights[1]) < 0) then begin + if (ac >= Length(items)) then + SetLength(items,Length(items)+256); + items[ac] := pw^.Weights[1]; + ac := ac+1; + end; + end; + Inc(pw); + end; + end; + Inc(p); + end; + Result := ac; +end; + +function RewriteLevel2( + const ALevel1Value : Cardinal; + ALines : PUCA_LineRec; + const ALinesLength : Integer +) : Integer; +var + i, c, k : Integer; + ac : Integer; + items : array of Cardinal; + p : PUCA_LineRec; + pw : ^TUCA_WeightRec; + newValue : Cardinal; +begin + c := ALinesLength; + if (c < 1) then + exit(0); + SetLength(items,256); + ac := 0; + p := ALines; + for i := 0 to c-1 do begin + if (Length(p^.Weights) > 0) then begin + for k := 0 to Length(p^.Weights)-1 do begin + pw := @p^.Weights[k]; + if (pw^.Weights[0] = ALevel1Value) then begin + if (ac = 0) or (IndexDWord(items[0],ac,pw^.Weights[1]) < 0) then begin + if (ac >= Length(items)) then + SetLength(items,Length(items)+256); + items[ac] := pw^.Weights[1]; + ac := ac+1; + end; + end; + end; + end; + Inc(p); + end; + SetLength(items,ac); + if (ac > 1) then + QuickSort(@items[0],0,(ac-1)); + + p := ALines; + for i := 0 to c-1 do begin + if (Length(p^.Weights) > 0) then begin + for k := 0 to Length(p^.Weights)-1 do begin + pw := @p^.Weights[k]; + if (pw^.Weights[0] = ALevel1Value) then begin + newValue := IndexDWord(items[0],ac,pw^.Weights[1]); + if (newValue < 0) then + raise Exception.CreateFmt('level 2 value %d missed in rewrite of level 1 value of %d.',[pw^.Weights[1],ALevel1Value]); + pw^.Weights[1] := newValue;//+1; + end; + end; + end; + Inc(p); + end; + if (Length(items) > 0) then + Result := items[Length(items)-1] + else + Result := 0; +end; + +procedure RewriteLevel2Values(ALines : PUCA_LineRec; ALength : Integer); +var + c, i, ac, k : Integer; + p : PUCA_LineRec; + level1List : array of Cardinal; + pw : ^TUCA_WeightRec; +begin + c := ALength; + if (c < 1) then + exit; + ac := 0; + SetLength(level1List,c); + p := ALines; + for i := 0 to c-1 do begin + if (Length(p^.Weights) > 0) then begin + for k := 0 to Length(p^.Weights)-1 do begin + pw := @p^.Weights[k]; + if (ac = 0) or (IndexDWord(level1List[0],ac,pw^.Weights[0]) < 0) then begin + if (ac >= Length(level1List)) then + SetLength(level1List,ac+1000); + level1List[ac] := pw^.Weights[0]; + RewriteLevel2(level1List[ac],ALines,ALength); + ac := ac+1; + end; + end; + end; + Inc(p); + end; +end; + +function CalcMaxLevel2Value(ALines : array of TUCA_LineRec) : Cardinal; +var + i, c, k, tempValue : Integer; + p : PUCA_LineRec; + maxLevel : Cardinal; + maxValue : Integer; +begin + c := Length(ALines); + if (c < 2) then + exit(0); + maxLevel := 0; + maxValue := CalcMaxLevel2Count(maxLevel,ALines); + p := @ALines[Low(ALines)+1]; + for i := 1 to c-1 do begin + if (Length(p^.Weights) > 0) then begin + for k := 0 to Length(p^.Weights)-1 do begin + if (p^.Weights[k].Weights[0] <> maxLevel) then begin + tempValue := CalcMaxLevel2Count(p^.Weights[k].Weights[0],ALines); + if (tempValue > maxValue) then begin + maxLevel := p^.Weights[k].Weights[0]; + maxValue := tempValue; + end; + end; + end; + end; + Inc(p); + end; + Result := maxValue; +end; + initialization FS := DefaultFormatSettings; FS.DecimalSeparator := '.'; diff --git a/utils/unicode/unicodeset.pas b/utils/unicode/unicodeset.pas index 0be30432f4..974e8748e5 100644 --- a/utils/unicode/unicodeset.pas +++ b/utils/unicode/unicodeset.pas @@ -1,6 +1,6 @@ { UnicodeSet implementation. - Copyright (c) 2013 by Inoussa OUEDRAOGO + Copyright (c) 2013-2015 by Inoussa OUEDRAOGO The source code is distributed under the Library GNU General Public License with the following modification: @@ -36,6 +36,8 @@ type TUnicodeSet = class; + { TPatternParser } + TPatternParser = class private FBufferStr : UnicodeString; @@ -43,6 +45,7 @@ type FBufferLength : Integer; FSet : TUnicodeSet; FPosition : Integer; + FSpecialChar: Boolean; private procedure Error(const AMsg : string; const AArgs : array of const);overload;inline; procedure Error(const AMsg : string);overload;inline; @@ -58,6 +61,7 @@ type function NextChar() : TUnicodeCodePoint; procedure ParseItem(); procedure DoParse(); + property SpecialChar : Boolean read FSpecialChar; public procedure Parse(const APattern : PUnicodeChar; const ALength : Integer);overload; procedure Parse(const APattern : UnicodeString);overload;inline; @@ -73,6 +77,8 @@ type class function Compare(const A, B : TUnicodeCodePointArray) : Integer;static;inline; end; + { TUnicodeSet } + TUnicodeSet = class private type TItem = TUnicodeCodePointArray; @@ -84,18 +90,22 @@ type FParser : TPatternParser; private procedure CreateParser();inline; + function InternalContains(const AString : UnicodeString) : Boolean;overload; public constructor Create(); destructor Destroy;override; procedure Add(AChar : TUnicodeCodePoint);inline;overload; procedure Add(AString : TUnicodeCodePointArray);inline;overload; procedure AddRange(const AStart, AEnd : TUnicodeCodePoint);inline; - procedure AddPattern(const APattern : UnicodeString);inline; + procedure AddPattern(const APattern : UnicodeString);inline;overload; + procedure AddPattern(const APattern : RawByteString);inline;overload; function CreateIterator() : TIterator; function Contains(const AString : array of TUnicodeCodePoint) : Boolean;overload; function Contains(const AChar : TUnicodeCodePoint) : Boolean;inline;overload; function Contains(const AChar : UnicodeChar) : Boolean;inline;overload; function Contains(const AChar : AnsiChar) : Boolean;inline;overload; + function Contains(const AString : UnicodeString) : Boolean;overload; + function Contains(const AString : RawByteString) : Boolean;overload; end; resourcestring @@ -267,7 +277,7 @@ begin CheckEOF(4); s := Copy(FBufferStr,(FPosition+1),4); Inc(FPosition,4); - if not TryStrToInt('$'+s,i) then + if not TryStrToInt(string('$'+s),i) then Error(SExpectedBut,['\uXXXX',s]); cp := i; end; @@ -281,6 +291,7 @@ begin Inc(FPosition); end; end; + FSpecialChar := (cp = Ord('{')) or (cp = Ord('}')); Result := cp; end; @@ -292,7 +303,8 @@ end; procedure TPatternParser.ParseItem(); var cp, lastCp : TUnicodeCodePoint; - charCount : Integer; + charCount, k : Integer; + cpa : TUnicodeCodePointArray; begin SkipSpaces(); Expect('['); @@ -304,13 +316,31 @@ begin cp := NextChar(); if CompareTo(cp,']') then Break; - if CompareTo(cp,'-') then begin - if (charCount = 0) then - Error(SExpectedBut,['<char>','-']); - cp := NextChar(); - FSet.AddRange(lastCp,cp); + if SpecialChar and (cp = Ord('{')) then begin + SetLength(cpa,12); + k := 0; + while True do begin + cp := NextChar(); + if SpecialChar and (cp = Ord('}')) then + break; + if (k >= Length(cpa)) then + SetLength(cpa,(2*k)); + cpa[k] := cp; + k := k+1; + end; + if (k > 0) then begin + SetLength(cpa,k); + FSet.Add(cpa); + end; end else begin - FSet.Add(cp); + if CompareTo(cp,'-') then begin + if (charCount = 0) then + Error(SExpectedBut,['<char>','-']); + cp := NextChar(); + FSet.AddRange(lastCp,cp); + end else begin + FSet.Add(cp); + end; end; Inc(charCount); end; @@ -348,6 +378,22 @@ begin end; end; +function TUnicodeSet.InternalContains(const AString: UnicodeString): Boolean; +var + u4 : UCS4String; + c, i : Integer; + cpa : TUnicodeCodePointArray; +begin + u4 := UnicodeStringToUCS4String(AString); + c := Length(u4)-1; + if (c = 1) then + exit(Contains(u4[0])); + SetLength(cpa,c); + for i := 0 to c-1 do + cpa[i] := u4[i]; + Result := Contains(cpa); +end; + constructor TUnicodeSet.Create; begin FTree := TTree.Create(); @@ -387,6 +433,14 @@ begin FParser.Parse(APattern); end; +procedure TUnicodeSet.AddPattern(const APattern: RawByteString); +var + us : UnicodeString; +begin + us := UnicodeString(APattern); + AddPattern(us); +end; + function TUnicodeSet.CreateIterator() : TIterator; begin Result := FTree.CreateForwardIterator(); @@ -422,5 +476,22 @@ begin Result := Contains(TUnicodeCodePoint(Ord(AChar))); end; +function TUnicodeSet.Contains(const AString: UnicodeString): Boolean; +begin + if (AString = '') then + exit(Contains([])); + if (Length(AString) = 1) then + exit(Contains(AString[1])); + Result := InternalContains(AString); +end; + +function TUnicodeSet.Contains(const AString: RawByteString): Boolean; +var + us : UnicodeString; +begin + us := UnicodeString(AString); + Result := Contains(us); +end; + end. |