summaryrefslogtreecommitdiff
path: root/utils/unicode
diff options
context:
space:
mode:
authorpaul <paul@3ad0048d-3df7-0310-abae-a5850022a9f2>2013-08-19 13:42:11 +0000
committerpaul <paul@3ad0048d-3df7-0310-abae-a5850022a9f2>2013-08-19 13:42:11 +0000
commit31194d24e16feec10a8d3ef7fa0358b94588df3c (patch)
tree81d4c58051bdd044628135f0ce95a8e741f5c1b6 /utils/unicode
parentaaa0bf4ff480c748fa6b070224116ba0b4907a80 (diff)
downloadfpc-31194d24e16feec10a8d3ef7fa0358b94588df3c.tar.gz
rtl, utils: apply patch of Inoussa:
This patch implements collation'loading at runtime. This reduce the final executable' size as the collation's data are now externaly stored. Note that It requires the external collation files to be shipped and the program to load the collations it needs using the "LoadCollation"/"RegisterCollation" procedure(s). The external collation files are produced by "cldrparser" (while producing the static files). The root collation "ducet" 's external file is produced by "unihelper". It is important to note that these files are endian specific : * collation_*_be.bco for big endian systems * collation_*_le.bco for little endian system. The root collation should at be registered, be it staticaly by using the "unicodeducet" unit or dynamicaly by making a call sush as RegisterCollation(<collation dir>,'ducet'). It is possible, in the same application, to make use of static and dynamic. git-svn-id: http://svn.freepascal.org/svn/fpc/trunk@25295 3ad0048d-3df7-0310-abae-a5850022a9f2
Diffstat (limited to 'utils/unicode')
-rw-r--r--utils/unicode/cldrhelper.pas67
-rw-r--r--utils/unicode/cldrparser.lpr57
-rw-r--r--utils/unicode/helper.pas115
-rw-r--r--utils/unicode/unihelper.lpr41
4 files changed, 263 insertions, 17 deletions
diff --git a/utils/unicode/cldrhelper.pas b/utils/unicode/cldrhelper.pas
index e5324f6609..a1495244f5 100644
--- a/utils/unicode/cldrhelper.pas
+++ b/utils/unicode/cldrhelper.pas
@@ -208,14 +208,16 @@ type
) : Integer;
function FindCollationDefaultItemName(ACollation : TCldrCollation) : string;
procedure GenerateCdlrCollation(
- ACollation : TCldrCollation;
- AItemName : string;
- AStoreName : string;
+ ACollation : TCldrCollation;
+ AItemName : string;
+ AStoreName : string;
AStream,
ANativeEndianStream,
- AOtherEndianStream : TStream;
- ARootChars : TOrderedCharacters;
- ARootWeigths : TUCA_LineRecArray
+ AOtherEndianStream,
+ ABinaryNativeEndianStream,
+ ABinaryOtherEndianStream : TStream;
+ ARootChars : TOrderedCharacters;
+ ARootWeigths : TUCA_LineRecArray
);
procedure GenerateUCA_CLDR_Head(
@@ -1635,14 +1637,16 @@ begin
end;
procedure GenerateCdlrCollation(
- ACollation : TCldrCollation;
- AItemName : string;
- AStoreName : string;
+ ACollation : TCldrCollation;
+ AItemName : string;
+ AStoreName : string;
AStream,
ANativeEndianStream,
- AOtherEndianStream : TStream;
- ARootChars : TOrderedCharacters;
- ARootWeigths : TUCA_LineRecArray
+ AOtherEndianStream,
+ ABinaryNativeEndianStream,
+ ABinaryOtherEndianStream : TStream;
+ ARootChars : TOrderedCharacters;
+ ARootWeigths : TUCA_LineRecArray
);
procedure AddLine(const ALine : ansistring; ADestStream : TStream);
@@ -1665,6 +1669,8 @@ var
ucaoSecondTable : TucaOBmpSecondTable;
locHasProps : Boolean;
s : string;
+ serializedHeader : TSerializedCollationHeader;
+ e : TCollationField;
begin
locItem := ACollation.Find(AItemName);
if (locItem = nil) then
@@ -1707,6 +1713,43 @@ begin
AddLine('{$endif FPC_LITTLE_ENDIAN}',AStream);
end;
GenerateUCA_CLDR_Registration(AStream,@locUcaBook);
+
+ FillChar(serializedHeader,SizeOf(TSerializedCollationHeader),0);
+ serializedHeader.Base := locItem.Base;
+ serializedHeader.Version := ACollation.Version;
+ serializedHeader.CollationName := ACollation.Language;
+ serializedHeader.VariableWeight := Ord(locUcaBook.VariableWeight);
+ SetBit(serializedHeader.Backwards,0,locUcaBook.Backwards[0]);
+ SetBit(serializedHeader.Backwards,1,locUcaBook.Backwards[1]);
+ SetBit(serializedHeader.Backwards,2,locUcaBook.Backwards[2]);
+ SetBit(serializedHeader.Backwards,3,locUcaBook.Backwards[3]);
+ if locHasProps then begin
+ serializedHeader.BMP_Table1Length := Length(ucaFirstTable);
+ serializedHeader.BMP_Table2Length := Length(TucaBmpSecondTableItem) *
+ (Length(ucaSecondTable) * SizeOf(UInt24));
+ serializedHeader.OBMP_Table1Length := Length(ucaoFirstTable) * SizeOf(Word);
+ serializedHeader.OBMP_Table2Length := Length(TucaOBmpSecondTableItem) *
+ (Length(ucaoSecondTable) * SizeOf(UInt24));
+ serializedHeader.PropCount := locUcaProps^.ItemSize;
+ serializedHeader.VariableLowLimit := locUcaProps^.VariableLowLimit;
+ serializedHeader.VariableHighLimit := locUcaProps^.VariableHighLimit;
+ end else begin
+ serializedHeader.VariableLowLimit := High(Word);
+ serializedHeader.VariableHighLimit := 0;
+ end;
+ serializedHeader.ChangedFields := 0;
+ for e := Low(TCollationField) to High(TCollationField) do begin
+ if (e in locItem.ChangedFields) then
+ SetBit(serializedHeader.ChangedFields,Ord(e),True);
+ end;
+ ABinaryNativeEndianStream.Write(serializedHeader,SizeOf(serializedHeader));
+ ReverseRecordBytes(serializedHeader);
+ ABinaryOtherEndianStream.Write(serializedHeader,SizeOf(serializedHeader));
+ if locHasProps then begin
+ GenerateBinaryUCA_BmpTables(ABinaryNativeEndianStream,ABinaryOtherEndianStream,ucaFirstTable,ucaSecondTable);
+ GenerateBinaryUCA_OBmpTables(ABinaryNativeEndianStream,ABinaryOtherEndianStream,ucaoFirstTable,ucaoSecondTable);
+ GenerateBinaryUCA_PropTable(ABinaryNativeEndianStream,ABinaryOtherEndianStream,locUcaProps);
+ end;
finally
locSequence.Clear();
FreeUcaBook(locUcaProps);
diff --git a/utils/unicode/cldrparser.lpr b/utils/unicode/cldrparser.lpr
index 9fa60926f2..a8d98796c1 100644
--- a/utils/unicode/cldrparser.lpr
+++ b/utils/unicode/cldrparser.lpr
@@ -22,9 +22,10 @@
program cldrparser;
{$mode objfpc}{$H+}
+{ $define WINCE_TEST}
uses
- SysUtils, classes, getopts,
+ SysUtils, classes, getopts,{$ifdef WINCE}StreamIO,{$endif}
cldrhelper, helper, cldrtest, cldrxml, unicodeset;
const
@@ -66,6 +67,12 @@ var
idx, k : Integer;
s : string;
begin
+{$ifdef WINCE_TEST}
+ ADataDir := ExtractFilePath(ParamStr(0))+'data';
+ AOuputDir := ADataDir;
+ ACollationFileName := 'sv.xml';
+ exit(True);
+{$endif WINCE_TEST}
if (ParamCount() = 0) then
exit(False);
Result := True;
@@ -101,14 +108,41 @@ end;
var
orderedChars : TOrderedCharacters;
ucaBook : TUCA_DataBook;
- stream, streamNE, streamOE : TMemoryStream;
+ stream, streamNE, streamOE, binaryStreamNE, binaryStreamOE : TMemoryStream;
s, collationFileName, collationTypeName : string;
i , c: Integer;
collation : TCldrCollation;
dataPath, outputPath : string;
collationItem : TCldrCollationItem;
testSuiteFlag : Boolean;
+{$ifdef WINCE}
+ fs : TFileStream;
+{$endif WINCE}
begin
+{$ifdef WINCE}
+ s := ExtractFilePath(ParamStr(0))+'cldr-log.txt';
+ DeleteFile(s);
+ fs := TFileStream.Create(s,fmCreate);
+ AssignStream(Output,fs);
+ Rewrite(Output);
+ s := ExtractFilePath(ParamStr(0))+'cldr-err.txt';
+ DeleteFile(s);
+ fs := TFileStream.Create(s,fmCreate);
+ AssignStream(ErrOutput,fs);
+ Rewrite(ErrOutput);
+{$endif WINCE}
+{$ifdef WINCE_TEST}
+ testSuiteFlag := True;
+ try
+ exec_tests();
+ except
+ on e : Exception do begin
+ WriteLn('Exception : '+e.Message);
+ raise;
+ end;
+ end;
+ exit;
+{$endif WINCE_TEST}
dataPath := '';
outputPath := '';
collationFileName := '';
@@ -132,10 +166,12 @@ begin
outputPath := dataPath
else
outputPath := IncludeTrailingPathDelimiter(outputPath);
+{$ifndef WINCE_TEST}
if (ParamCount() = 0) then begin
WriteLn(SUsageText);
Halt(1);
end;
+{$endif WINCE_TEST}
if not(
FileExists(dataPath+'UCA_Rules_SHORT.xml') and
FileExists(dataPath+'allkeys.txt')
@@ -155,6 +191,8 @@ begin
stream := nil;
streamNE := nil;
streamOE := nil;
+ binaryStreamNE := nil;
+ binaryStreamOE := nil;
collation := TCldrCollation.Create();
try
ParseCollationDocument(collationFileName,collation,TCldrParserMode.HeaderParsing);
@@ -194,9 +232,12 @@ begin
stream.Clear();
streamNE := TMemoryStream.Create();
streamOE := TMemoryStream.Create();
+ binaryStreamNE := TMemoryStream.Create();
+ binaryStreamOE := TMemoryStream.Create();
s := COLLATION_FILE_PREFIX + ChangeFileExt(LowerCase(ExtractFileName(collationFileName)),'.pas');
GenerateCdlrCollation(
collation,collationTypeName,s,stream,streamNE,streamOE,
+ binaryStreamNE,binaryStreamOE,
orderedChars,ucaBook.Lines
);
stream.SaveToFile(ExtractFilePath(collationFileName)+s);
@@ -204,8 +245,20 @@ begin
streamNE.SaveToFile(ExtractFilePath(collationFileName)+GenerateEndianIncludeFileName(s,ENDIAN_NATIVE));
streamOE.SaveToFile(ExtractFilePath(collationFileName)+GenerateEndianIncludeFileName(s,ENDIAN_NON_NATIVE));
end;
+ if (binaryStreamNE.Size > 0) then begin
+ binaryStreamNE.SaveToFile(
+ ExtractFilePath(collationFileName) +
+ ChangeFileExt(s,Format('_%s.bco',[ENDIAN_SUFFIX[ENDIAN_NATIVE]]))
+ );
+ binaryStreamOE.SaveToFile(
+ ExtractFilePath(collationFileName) +
+ ChangeFileExt(s,Format('_%s.bco',[ENDIAN_SUFFIX[ENDIAN_NON_NATIVE]]))
+ );
+ end;
end;
finally
+ binaryStreamOE.Free();
+ binaryStreamNE.Free();
streamOE.Free();
streamNE.Free();
stream.Free();
diff --git a/utils/unicode/helper.pas b/utils/unicode/helper.pas
index e171a72672..1e10910f5d 100644
--- a/utils/unicode/helper.pas
+++ b/utils/unicode/helper.pas
@@ -487,11 +487,23 @@ const
var AFirstTable : TucaBmpFirstTable;
var ASecondTable : TucaBmpSecondTable
);
+ procedure GenerateBinaryUCA_BmpTables(
+ ANativeEndianStream,
+ ANonNativeEndianStream : TStream;
+ var AFirstTable : TucaBmpFirstTable;
+ var ASecondTable : TucaBmpSecondTable
+ );
procedure GenerateUCA_PropTable(
ADest : TStream;
const APropBook : PUCA_PropBook;
const AEndian : TEndianKind
);
+ procedure GenerateBinaryUCA_PropTable(
+ // WARNING : files must be generated for each endianess (Little / Big)
+ ANativeEndianStream,
+ ANonNativeEndianStream : TStream;
+ const APropBook : PUCA_PropBook
+ );
procedure GenerateUCA_OBmpTables(
AStream,
ANativeEndianStream,
@@ -499,6 +511,12 @@ const
var AFirstTable : TucaOBmpFirstTable;
var ASecondTable : TucaOBmpSecondTable
);
+ procedure GenerateBinaryUCA_OBmpTables(
+ ANativeEndianStream,
+ ANonNativeEndianStream : TStream;
+ var AFirstTable : TucaOBmpFirstTable;
+ var ASecondTable : TucaOBmpSecondTable
+ );
procedure Parse_UnicodeData(
ADataAStream : TMemoryStream;
@@ -611,7 +629,6 @@ const
): PPropRec; inline;overload;
procedure FromUCS4(const AValue : TUnicodeCodePoint; var AHighS, ALowS : Word);inline;
function ToUCS4(const AHighS, ALowS : Word) : TUnicodeCodePoint; inline;
-//--------------------
type
TBitOrder = 0..7;
@@ -640,6 +657,29 @@ type
const ADataLen : Integer
);
+type
+ TCollationName = string[128];
+ TSerializedCollationHeader = packed record
+ Base : TCollationName;
+ Version : TCollationName;
+ CollationName : TCollationName;
+ VariableWeight : Byte;
+ Backwards : Byte;
+ BMP_Table1Length : DWord;
+ BMP_Table2Length : DWord;
+ OBMP_Table1Length : DWord;
+ OBMP_Table2Length : DWord;
+ PropCount : DWord;
+ VariableLowLimit : Word;
+ VariableHighLimit : Word;
+ ChangedFields : Byte;
+ end;
+ PSerializedCollationHeader = ^TSerializedCollationHeader;
+
+ procedure ReverseRecordBytes(var AItem : TSerializedCollationHeader);
+ procedure ReverseBytes(var AData; const ALength : Integer);
+ procedure ReverseArray(var AValue; const AArrayLength, AItemSize : PtrInt);
+
resourcestring
SInsufficientMemoryBuffer = 'Insufficient Memory Buffer';
@@ -3294,6 +3334,28 @@ begin
AddLine(ANonNativeEndianStream,' );' + sLineBreak);
end;
+procedure GenerateBinaryUCA_BmpTables(
+ ANativeEndianStream,
+ ANonNativeEndianStream : TStream;
+ var AFirstTable : TucaBmpFirstTable;
+ var ASecondTable : TucaBmpSecondTable
+);
+var
+ i, j : Integer;
+ value : UInt24;
+begin
+ ANativeEndianStream.Write(AFirstTable[0],Length(AFirstTable));
+ ANonNativeEndianStream.Write(AFirstTable[0],Length(AFirstTable));
+ for i := Low(ASecondTable) to High(ASecondTable) do begin
+ for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
+ value := ASecondTable[i][j];
+ ANativeEndianStream.Write(value,SizeOf(value));
+ ReverseBytes(value,SizeOf(value));
+ ANonNativeEndianStream.Write(value,SizeOf(value));
+ end;
+ end;
+end;
+
procedure GenerateUCA_PropTable(
// WARNING : files must be generated for each endianess (Little / Big)
ADest : TStream;
@@ -3336,6 +3398,17 @@ begin
AddLine(' );' + sLineBreak);
end;
+procedure GenerateBinaryUCA_PropTable(
+// WARNING : files must be generated for each endianess (Little / Big)
+ ANativeEndianStream,
+ ANonNativeEndianStream : TStream;
+ const APropBook : PUCA_PropBook
+);
+begin
+ ANativeEndianStream.Write(APropBook^.Items^,APropBook^.ItemSize);
+ ANonNativeEndianStream.Write(APropBook^.ItemsOtherEndian^,APropBook^.ItemSize);
+end;
+
procedure GenerateUCA_OBmpTables(
AStream,
ANativeEndianStream,
@@ -3410,7 +3483,34 @@ begin
AddLine(ANonNativeEndianStream,' );' + sLineBreak);
end;
-//-------------------------------------------
+procedure GenerateBinaryUCA_OBmpTables(
+ ANativeEndianStream,
+ ANonNativeEndianStream : TStream;
+ var AFirstTable : TucaOBmpFirstTable;
+ var ASecondTable : TucaOBmpSecondTable
+);
+var
+ i, j : Integer;
+ locLine : string;
+ wordValue : Word;
+ value : UInt24;
+begin
+ for i := Low(AFirstTable) to High(AFirstTable) do begin
+ wordValue := AFirstTable[i];
+ ANativeEndianStream.Write(wordValue,SizeOf(wordValue));
+ ReverseBytes(wordValue,SizeOf(wordValue));
+ ANonNativeEndianStream.Write(wordValue,SizeOf(wordValue));
+ end;
+
+ for i := Low(ASecondTable) to High(ASecondTable) do begin
+ for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
+ value := ASecondTable[i][j];
+ ANativeEndianStream.Write(value,SizeOf(value));
+ ReverseBytes(value,SizeOf(value));
+ ANonNativeEndianStream.Write(value,SizeOf(value));
+ end;
+ end;
+end;
type
POBmpSecondTableItem = ^TOBmpSecondTableItem;
@@ -4103,6 +4203,17 @@ begin
Result := r;
end;
+procedure ReverseRecordBytes(var AItem : TSerializedCollationHeader);
+begin
+ ReverseBytes(AItem.BMP_Table1Length,SizeOf(AItem.BMP_Table1Length));
+ ReverseBytes(AItem.BMP_Table2Length,SizeOf(AItem.BMP_Table2Length));
+ ReverseBytes(AItem.OBMP_Table1Length,SizeOf(AItem.OBMP_Table1Length));
+ ReverseBytes(AItem.OBMP_Table2Length,SizeOf(AItem.OBMP_Table2Length));
+ ReverseBytes(AItem.PropCount,SizeOf(AItem.PropCount));
+ ReverseBytes(AItem.VariableLowLimit,SizeOf(AItem.VariableLowLimit));
+ ReverseBytes(AItem.VariableHighLimit,SizeOf(AItem.VariableHighLimit));
+end;
+
procedure ReverseBytes(var AData; const ALength : Integer);
var
i,j : PtrInt;
diff --git a/utils/unicode/unihelper.lpr b/utils/unicode/unihelper.lpr
index 794f901b5d..f1b3de9e2e 100644
--- a/utils/unicode/unihelper.lpr
+++ b/utils/unicode/unihelper.lpr
@@ -31,7 +31,7 @@
program unihelper;
{$mode objfpc}{$H+}
-{$typedadress on}
+{$typedaddress on}
uses
SysUtils, Classes,
@@ -66,6 +66,7 @@ end;
var
dataPath, outputPath : string;
stream, binStreamNE, binStreamOE, tmpStream : TMemoryStream;
+ binaryStreamNE, binaryStreamOE : TMemoryStream;
hangulSyllables : TCodePointRecArray;
ucaBook : TUCA_DataBook;
ucaPropBook : PUCA_PropBook;
@@ -95,6 +96,7 @@ var
ucaoFirstTable : TucaoBmpFirstTable;
ucaoSecondTable : TucaOBmpSecondTable;
WL : Integer;
+ serializedHeader : TSerializedCollationHeader;
begin
WriteLn(SUsage+sLineBreak);
if (ParamCount > 0) then
@@ -125,6 +127,8 @@ begin
Halt(1);
end;
+ binaryStreamNE := nil;
+ binaryStreamOE := nil;
binStreamOE := nil;
binStreamNE := nil;
tmpStream := nil;
@@ -206,6 +210,8 @@ begin
{$IFDEF UCA_TEST}
uca_CheckProp_2y(ucaBook,ucaPropBook,@ucaoFirstTable,@ucaoSecondTable);
{$ENDIF UCA_TEST}
+ binaryStreamNE := TMemoryStream.Create();
+ binaryStreamOE := TMemoryStream.Create();
WriteLn('Generate UCA Props tables ...');
binStreamNE.Clear();
binStreamOE.Clear();
@@ -226,6 +232,37 @@ begin
binStreamOE.SaveToFile(GenerateEndianIncludeFileName(s,ENDIAN_NON_NATIVE));
binStreamNE.Clear();
binStreamOE.Clear();
+// Binary DUCET
+ FillChar(serializedHeader,SizeOf(TSerializedCollationHeader),0);
+ serializedHeader.Version := ucaBook.Version;
+ serializedHeader.CollationName := 'DUCET';//'Default Unicode Collation Element Table (DUCET)';
+ serializedHeader.VariableWeight := Ord(ucaBook.VariableWeight);
+ SetBit(serializedHeader.Backwards,0,ucaBook.Backwards[0]);
+ SetBit(serializedHeader.Backwards,1,ucaBook.Backwards[1]);
+ SetBit(serializedHeader.Backwards,2,ucaBook.Backwards[2]);
+ SetBit(serializedHeader.Backwards,3,ucaBook.Backwards[3]);
+ serializedHeader.BMP_Table1Length := Length(ucaFirstTable);
+ serializedHeader.BMP_Table2Length := Length(TucaBmpSecondTableItem) *
+ (Length(ucaSecondTable) * SizeOf(UInt24));
+ serializedHeader.OBMP_Table1Length := Length(ucaoFirstTable) * SizeOf(Word);
+ serializedHeader.OBMP_Table2Length := Length(TucaOBmpSecondTableItem) *
+ (Length(ucaoSecondTable) * SizeOf(UInt24));
+ serializedHeader.PropCount := ucaPropBook^.ItemSize;
+ serializedHeader.VariableLowLimit := ucaPropBook^.VariableLowLimit;
+ serializedHeader.VariableHighLimit := ucaPropBook^.VariableHighLimit;
+ binaryStreamNE.Write(serializedHeader,SizeOf(serializedHeader));
+ ReverseRecordBytes(serializedHeader);
+ binaryStreamOE.Write(serializedHeader,SizeOf(serializedHeader));
+ GenerateBinaryUCA_BmpTables(binaryStreamNE,binaryStreamOE,ucaFirstTable,ucaSecondTable);
+ GenerateBinaryUCA_OBmpTables(binaryStreamNE,binaryStreamOE,ucaoFirstTable,ucaoSecondTable);
+ GenerateBinaryUCA_PropTable(binaryStreamNE,binaryStreamOE,ucaPropBook);
+ binaryStreamNE.SaveToFile(
+ outputPath + Format('collation_ducet_%s.bco',[ENDIAN_SUFFIX[ENDIAN_NATIVE]])
+ );
+ binaryStreamOE.SaveToFile(
+ outputPath + Format('collation_ducet_%s.bco',[ENDIAN_SUFFIX[ENDIAN_NON_NATIVE]])
+ );
+// Binary DUCET - END
stream.Clear();
@@ -386,6 +423,8 @@ begin
end;
stream.SaveToFile(outputPath + 'diff2.txt');
finally
+ binaryStreamOE.Free();
+ binaryStreamNE.Free();
tmpStream.Free();
binStreamOE.Free();
binStreamNE.Free();